org.jpedal.utils.StringUtils Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of jsign-jpedal Show documentation
JPedal fork
The newest version!
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/java-pdf-library-support/
 *
 * (C) Copyright 1997-2013, IDRsolutions and Contributors.
 *
 * 	This file is part of JPedal
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * StringUtils.java
 * ---------------
 */
package org.jpedal.utils;

import java.io.UnsupportedEncodingException;

import org.jpedal.PdfDecoder;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.io.TextTokens;
import org.jpedal.parser.DecoderOptions;

public class StringUtils {

	private static final int aInt = 97;
	private static final int zeroInt = 48;
	private static final int nineInt = 57;
	private static final int openSquareBracketInt = 91;
	private static final int closeSquareBracketInt = 93;
	private static final int openCurlyBracket = 40;
	private static final int closeCurlyBracket = 41;
	private static final int backSlashInt = 92;
	private static final int forwardSlashInt = 47;
	private static final int hashInt = 35;
	private static final int divideInt = 247;
	private static final int fullStopInt = 46;
	private static final int spaceInt = 32;
	private static final int percentInt = 37;
	private static final int minusInt = 45;
	private static final int underScoreInt = 95;
	private static final int backSlachInt = 92;
	private static final int nInt = 110;
	private static final int newLineInt = 10;
	private static final int plusInt = 43;
	private static final int pInt = 112;
	private static final int colonInt = 58;
	private static final int equalsInt = 61;
	private static final int cInt = 99;
	private static final int qInt = 113;

	private static String enc;

	static {
		enc = System.getProperty("file.encoding");

		if (enc.equals("UTF-8") || enc.equals("MacRoman") || enc.equals("Cp1252")) {
			// fine carry on
		}
		else
			if (DecoderOptions.isRunningOnMac) enc = "MacRoman";
			else
				if (DecoderOptions.isRunningOnWindows) enc = "Cp1252";
				else enc = "UTF-8";
	}

	/**
	 * quick code to make text lower case
	 */
	public static String toLowerCase(String str) {

		int len = str.length();
		char c;
		char[] chars = str.toCharArray();

		// strip out any odd codes
		boolean isChanged = false;
		for (int jj = 0; jj < len; jj++) {
			c = chars[jj];

			// ensure lower case and flip if not
			if (c > 64 && c < 91) {
				c = (char) (c + 32);
				chars[jj] = c;
				isChanged = true;
			}
		}

		if (isChanged) return String.copyValueOf(chars, 0, len);
		else return str;
	}

	public static String toUpperCase(String str) {

		int len = str.length();
		char c;
		char[] chars = str.toCharArray();

		// strip out any odd codes
		boolean isChanged = false;
		for (int jj = 0; jj < len; jj++) {
			c = chars[jj];

			// ensure UPPER case and flip if not
			if (c > 96 && c < 123) {
				c = (char) (c - 32);
				chars[jj] = c;
				isChanged = true;
			}
		}

		if (isChanged) return String.copyValueOf(chars, 0, len);
		else return str;
	}

	static final public String handleEscapeChars(String value) {
		// deal with escape characters
		int escapeChar = value.indexOf(backSlachInt);

		while (escapeChar != -1) {
			char c = value.charAt(escapeChar + 1);
			if (c == nInt) {
				c = newLineInt;
			}
			else {}

			value = value.substring(0, escapeChar) + c + value.substring(escapeChar + 2, value.length());

			escapeChar = value.indexOf(backSlachInt);
		}
		return value;
	}

	/**
	 * turn any hex values (ie #e4) into chars
	 * 
	 * @param value
	 */
	static final public String convertHexChars(String value) {

		// avoid null
		if (value == null) return value;

		// find char
		int escapeChar = value.indexOf(hashInt);

		if (escapeChar == -1) return value;

		// process
		StringBuilder newString = new StringBuilder();
		int length = value.length();
		// newString.setLength(length);

		char c;

		for (int ii = 0; ii < length; ii++) {
			c = value.charAt(ii);

			if (c == hashInt) {
				ii++;
				int end = ii + 2;
				if (end > length) end = length;
				String key = value.substring(ii, end);

				c = (char) Integer.parseInt(key, 16);

				ii++;

				if (c != spaceInt) newString.append(c);
			}
			else newString.append(c);

		}

		return newString.toString();
	}

	/**
	 * check to see if the string contains anything other than '-' '0-9' '.' if so then its not a number.
	 */
	public static boolean isNumber(String textString) {
		byte[] data = StringUtils.toBytes(textString);
		int strLength = data.length;
		boolean isNumber = true;

		// assume true and disprove
		for (int j = 0; j < strLength; j++) {
			if ((data[j] >= zeroInt && data[j] <= nineInt) || data[j] == fullStopInt || (j == 0 && data[j] == minusInt)) { // assume and disprove
			}
			else {
				isNumber = false;
				// exit loop
				j = strLength;
			}
		}

		return isNumber;
	}

	/** removes the specified index from the array and reduces the array by 1 in length */
	public static String[] remove(String[] fields, int i) {
		if (i < 0 || i > fields.length) return fields;

		String[] retArray = new String[fields.length - 1];
		int r = 0;
		for (int f = 0; f < fields.length; f++) {
			if (f == i) continue;
			retArray[r++] = fields[f];
		}
		return retArray;
	}

	/**
	 * public static void main(String[] args){ //add characters here to get int UNIVERSAL equivalents. char[] chrs = new char[]{'(',')'}; for (int i =
	 * 0; i < chrs.length; i++) { System.out.println(chrs[i]+" ="+((int)chrs[i])); } }/
	 **/

	/**
	 * replaces all spaces ' ' with underscores '_' to allow the whole name to be used in HTML
	 * 
	 */
	public static String makeHTMLNameSafe(String name) {

		if (name == null || name.length() == 0) return name;

		char[] chrs = name.toCharArray();

		// replace any dodgy chars
		if (name.indexOf(percentInt) != -1 || name.indexOf(spaceInt) != -1 || name.indexOf(fullStopInt) != -1 || name.indexOf(plusInt) != -1
				|| name.indexOf(colonInt) != -1 || name.indexOf(equalsInt) != -1 || name.indexOf(forwardSlashInt) != -1
				|| name.indexOf(backSlashInt) != -1) {
			// NOTE: if you add any more please check with main method above for int values and DONT use char
			// strings as they are not cross platform. search for 'UNIVERSAL equivalents' to find main method.
			for (int i = 0; i < chrs.length; i++) {
				switch (chrs[i]) {

					case spaceInt:
						chrs[i] = underScoreInt;
						break;

					case fullStopInt:
						chrs[i] = minusInt;
						break;

					// replace & with safe char as images break if in path ?? ANY IDEA WHAT THIS LINE IS??
					case percentInt:
						chrs[i] = underScoreInt;
						break;

					case plusInt:
						chrs[i] = pInt;
						break;

					case colonInt:
						chrs[i] = cInt;
						break;

					case equalsInt:
						chrs[i] = qInt;
						break;

					case forwardSlashInt:
						chrs[i] = underScoreInt;
						break;

					case backSlashInt:
						chrs[i] = underScoreInt;
						break;
				}
			}
		}

		char[] testchrs = new char[] { openSquareBracketInt, closeSquareBracketInt, hashInt, divideInt, openCurlyBracket, closeCurlyBracket };
		int count = 0;
		for (char chr1 : chrs) {
			for (char testchr : testchrs) {
				if (chr1 == testchr) count++;
			}
		}

		if (count > 0) {
			int c = 0;
			char[] tmp = new char[chrs.length - count];
			MAINLOOP: for (char chr : chrs) {
				for (char testchr : testchrs) {
					if (chr == testchr) continue MAINLOOP;
				}
				tmp[c++] = chr;
			}
			chrs = tmp;

		}

		if (chrs[0] >= zeroInt && chrs[0] <= nineInt) {
			char[] tmp = new char[chrs.length + 1];
			System.arraycopy(chrs, 0, tmp, 1, chrs.length);
			tmp[0] = aInt;
			chrs = tmp;
		}

		name = new String(chrs);

		return name;
	}

	/**
	 * read a text String held in fieldName in string
	 */
	public static String getTextString(byte[] rawText, boolean keepReturns) {

		String returnText = "";

		// make sure encoding loaded
		StandardFonts.checkLoaded(StandardFonts.PDF);

		char[] chars = null;
		if (rawText != null) chars = new char[rawText.length * 2];
		int ii = 0;
		char nextChar;

		TextTokens rawChars = new TextTokens(rawText);

		// test to see if unicode
		if (rawChars.isUnicode()) {
			// its unicode
			while (rawChars.hasMoreTokens()) {
				nextChar = rawChars.nextUnicodeToken();
				if (nextChar == 9) {
					chars[ii] = 32;
					ii++;
				}
				else
					if (nextChar > 31 || (keepReturns && (nextChar == 10 || nextChar == 13))) {
						chars[ii] = nextChar;
						ii++;
					}
			}

		}
		else {
			// pdfDoc encoding

			while (rawChars.hasMoreTokens()) {
				nextChar = rawChars.nextToken();

				String c = null;
				if (nextChar == 9) {
					c = " ";
				}
				else
					if (keepReturns && (nextChar == 10 || nextChar == 13)) {
						c = String.valueOf(nextChar);
					}
					else
						if (nextChar > 31 && nextChar < 253) {
							c = StandardFonts.getEncodedChar(StandardFonts.PDF, nextChar);
						}

				if (c != null) {
					int len = c.length();

					// resize if needed
					if (ii + len >= chars.length) {
						char[] tmp = new char[len + ii + 10];
						System.arraycopy(chars, 0, tmp, 0, chars.length);
						chars = tmp;
					}

					// add values
					for (int i = 0; i < len; i++) {
						chars[ii] = c.charAt(i);
						ii++;
					}
				}
			}
		}

		if (chars != null) {
			returnText = String.copyValueOf(chars, 0, ii);
			// fix for certain PDFS using the HEX digits of a UTF-16 encoded String to store the string
			returnText = decodeUTF16String(returnText);
		}

		return returnText;
	}

	/**
	 * Decodes a String that is in the UTF16 format

	 * Example Input: FEFF0041007500730074007200690061

	 * Example Output: Austria
	 * 
	 * @param source
	 *            The UTF16 String
	 */
	public static String decodeUTF16String(String source) {
		String copy = source.toUpperCase();
		if (copy.length() >= 4 && copy.length() % 2 == 0 && copy.startsWith("FEFF") && copy.matches("[0-9A-F]+")) {
			copy = copy.substring(4);
			byte content[] = new byte[copy.length() / 2];
			for (int i = 0; i < copy.length(); i += 2) {
				content[i / 2] = (byte) Integer.parseInt(copy.substring(i, i + 2), 16);
			}

			StringBuilder newString = new StringBuilder(content.length / 2);
			for (int i = 0; i < content.length; i++) {
				if (content[i] != 0) {
					newString.append((char) content[i]);
				}
			}

			return newString.toString();
		}
		else {
			return source;
		}
	}

	public static String replaceAllManual(String string, int find, String replace) {
		int index = string.indexOf(find);
		while (index != -1) {
			string = string.substring(0, index) + replace + string.substring(index + 1);
			index = string.indexOf(find);
		}
		return string;
	}

	public static String correctSpecialChars(String string) {
		for (int i = 0; i < string.length(); i++) {
			switch (string.charAt(i)) {
				case 225:
					string = replaceAllManual(string, 225, "á");
					break;
				case 224:
					string = replaceAllManual(string, 224, "à");
					break;
				case 226:
					string = replaceAllManual(string, 226, "â");
					break;
				case 229:
					string = replaceAllManual(string, 229, "å");
					break;
				case 227:
					string = replaceAllManual(string, 227, "ã");
					break;
				case 228:
					string = replaceAllManual(string, 228, "ä");
					break;
				case 230:
					string = replaceAllManual(string, 230, "æ");
					break;
				case 231:
					string = replaceAllManual(string, 231, "ç");
					break;
				case 233:
					string = replaceAllManual(string, 233, "é");
					break;
				case 232:
					string = replaceAllManual(string, 232, "è");
					break;
				case 234:
					string = replaceAllManual(string, 234, "ê");
					break;
				case 235:
					string = replaceAllManual(string, 235, "ë");
					break;
				case 237:
					string = replaceAllManual(string, 237, "í");
					break;
				case 236:
					string = replaceAllManual(string, 236, "ì");
					break;
				case 238:
					string = replaceAllManual(string, 238, "î");
					break;
				case 239:
					string = replaceAllManual(string, 239, "ï");
					break;
				case 241:
					string = replaceAllManual(string, 241, "ñ");
					break;
				case 243:
					string = replaceAllManual(string, 243, "ó");
					break;
				case 242:
					string = replaceAllManual(string, 242, "ò");
					break;
				case 244:
					string = replaceAllManual(string, 244, "ô");
					break;
				case 248:
					string = replaceAllManual(string, 248, "ø");
					break;
				case 245:
					string = replaceAllManual(string, 245, "õ");
					break;
				case 246:
					string = replaceAllManual(string, 246, "ö");
					break;
				case 223:
					string = replaceAllManual(string, 223, "ß");
					break;
				case 250:
					string = replaceAllManual(string, 250, "ú");
					break;
				case 249:
					string = replaceAllManual(string, 249, "ù");
					break;
				case 251:
					string = replaceAllManual(string, 251, "û");
					break;
				case 252:
					string = replaceAllManual(string, 252, "ü");
					break;
				case 255:
					string = replaceAllManual(string, 255, "ÿ");
					break;
				case 8217:
					string = replaceAllManual(string, 8217, "'");
					break;
			// to find other codes check out http://www.interfacebus.com/html_escape_codes.html
			}
		}

		return string;
	}

	public static byte[] toBytes(String value) {

		byte[] data = null;

		try {
			data = value.getBytes(enc);

		}
		catch (UnsupportedEncodingException e) {
			// tell user and log
			if (LogWriter.isOutput()) LogWriter.writeLog("Exception: " + e.getMessage());
		}
		return data;
	}

	/**
	 * Replaces illegal characters that aren't allowed in code
	 * 
	 * @param S
	 *            String to have characters replaced in
	 * @return A safe String that can be used as a Java or Javascript variable or function
	 */
	public static String makeMethodSafe(String S) {
		String name = makeHTMLNameSafe(S);
		name = name.replace("-", "_");
		return name;
	}

	/**
	 * Replaces all illegal characters as defined by ses the standard UNICODE Consortium character repertoire. This means it strips out characters
	 * between: 0 to 31 inclusive and 127 to 159 inclusive.
	 * 
	 * @param S
	 */
	public static String stripIllegalCharacters(String S) {
		String newString = "";
		for (int i = 0; i < S.length(); i++) {
			char ch = S.charAt(i);
			if ((ch < 32 && ch >= 0) || (ch > 126 && ch < 160)) {
				continue;
			}
			newString += ch;
		}
		return newString;
	}
}