
org.jpedal.utils.StringUtils Maven / Gradle / Ivy
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2015 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ---------------
* StringUtils.java
* ---------------
*/
package org.jpedal.utils;
import java.io.UnsupportedEncodingException;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.io.TextTokens;
import org.jpedal.parser.DecoderOptions;
public class StringUtils {
private static final int ampersand = '&';
private static final int ampersandInt = 'A'; //use captial A as not escaped char
private static final int aInt = 97;
private static final int zeroInt = 48;
private static final int nineInt = 57;
private static final int openSquareBracketInt = 91;
private static final int closeSquareBracketInt = 93;
private static final int openCurlyBracket = 40;
private static final int closeCurlyBracket = 41;
private static final int backSlashInt = 92;
private static final int forwardSlashInt = 47;
private static final int hashInt = 35;
private static final int divideInt = 247;
private static final int fullStopInt = 46;
private static final int spaceInt = 32;
private static final int percentInt = 37;
private static final int minusInt = 45;
private static final int underScoreInt = 95;
// private final static int backSlachInt = 92;
// private final static int nInt = 110;
// private final static int newLineInt = 10;
private static final int plusInt = 43;
private static final int pInt = 112;
private static final int colonInt = 58;
private static final int equalsInt = 61;
private static final int cInt = 99;
private static final int qInt = 113;
private static String enc;
static{
enc=System.getProperty("file.encoding");
if(enc.equals("UTF-8") || enc.equals("MacRoman") || enc.equals("Cp1252")){
//fine carry on
}else if(DecoderOptions.isRunningOnMac) {
enc="MacRoman";
} else if(DecoderOptions.isRunningOnWindows) {
enc="Cp1252";
} else {
enc="UTF-8";
}
}
/**
* turn any hex values (ie #e4) into chars
* @param value
* @return
*/
public static final String convertHexChars(final String value) {
//avoid null
if(value==null) {
return value;
}
//find char
final int escapeChar=value.indexOf(hashInt);
if(escapeChar==-1) {
return value;
}
//process
final StringBuilder newString=new StringBuilder();
final int length=value.length();
//newString.setLength(length);
char c;
for(int ii=0;iilength) {
end=length;
}
final String key=value.substring(ii,end);
c=(char)Integer.parseInt(key,16);
ii++;
if(c!=spaceInt) {
newString.append(c);
}
}else {
newString.append(c);
}
}
return newString.toString();
}
/** check to see if the string contains anything other than
* '-' '0-9' '.'
* if so then its not a number.
*/
public static boolean isNumber(final String textString) {
final byte[] data=StringUtils.toBytes(textString);
final int strLength=data.length;
boolean isNumber=true;
//assume true and disprove
for(int j=0;j=zeroInt && data[j] <=nineInt)|| data[j]==fullStopInt
|| (j==0 && data[j]==minusInt)){ //assume and disprove
}else{
isNumber=false;
//exit loop
j=strLength;
}
}
return isNumber;
}
/** replaces all spaces ' ' with underscores '_' to allow the whole name to be used in HTML
*
*/
public static String makeHTMLNameSafe(String name) {
if(name==null || name.isEmpty()) {
return name;
}
char[] chrs = name.toCharArray();
//replace any dodgy chars
if(name.indexOf(percentInt)!=-1 || name.indexOf(spaceInt)!=-1 || name.indexOf(fullStopInt)!=-1 ||
name.indexOf(plusInt)!=-1 || name.indexOf(colonInt)!=-1 || name.indexOf(equalsInt)!=-1 ||
name.indexOf(forwardSlashInt)!=-1 || name.indexOf(backSlashInt)!=-1){
//NOTE: if you add any more please check with main method above for int values and DONT use char
//strings as they are not cross platform. search for 'UNIVERSAL equivalents' to find main method.
for (int i = 0; i < chrs.length; i++) {
switch(chrs[i]){
case ampersand:
chrs[i] = ampersandInt;
break;
case spaceInt:
chrs[i] = underScoreInt;
break;
case fullStopInt:
chrs[i] = minusInt;
break;
//replace & with safe char as images break if in path ?? ANY IDEA WHAT THIS LINE IS??
case percentInt:
chrs[i] = underScoreInt;
break;
case plusInt:
chrs[i] = pInt;
break;
case colonInt:
chrs[i] = cInt;
break;
case equalsInt:
chrs[i] = qInt;
break;
case forwardSlashInt:
chrs[i] = underScoreInt;
break;
case backSlashInt:
chrs[i] = underScoreInt;
break;
}
}
}
final char[] testchrs = {openSquareBracketInt,closeSquareBracketInt,hashInt,divideInt,
openCurlyBracket,closeCurlyBracket};
int count = 0;
for (final char chr1 : chrs) {
for (final char testchr : testchrs) {
if (chr1 == testchr) {
count++;
}
}
}
if(count>0){
int c=0;
final char[] tmp = new char[chrs.length-count];
MAINLOOP:
for (final char chr : chrs) {
for (final char testchr : testchrs) {
if (chr == testchr) {
continue MAINLOOP;
}
}
tmp[c++] = chr;
}
chrs = tmp;
}
if(chrs[0]>=zeroInt && chrs[0]<=nineInt){
final char[] tmp = new char[chrs.length+1];
System.arraycopy(chrs,0,tmp,1,chrs.length);
tmp[0] = aInt;
chrs = tmp;
}
name = new String(chrs);
return name;
}
/**
* read a text String held in fieldName in string
*/
public static String getTextString(final byte[] rawText, final boolean keepReturns) {
String returnText="";
//make sure encoding loaded
StandardFonts.checkLoaded(StandardFonts.PDF);
char[] chars=null;
if(rawText!=null) {
chars=new char[rawText.length*2];
}
int ii=0;
char nextChar;
final TextTokens rawChars=new TextTokens(rawText);
//test to see if unicode
if(rawChars.isUnicode()){
//its unicode
while(rawChars.hasMoreTokens()){
nextChar=rawChars.nextUnicodeToken();
//breask a file and does not appear used so removed 2013/5/20
if(nextChar==9 || (!keepReturns && (nextChar==10 || nextChar==13))){
chars[ii]=32;
ii++;
}else
if(nextChar>31 || (keepReturns && (nextChar==10 || nextChar==13))){
chars[ii]=nextChar;
ii++;
}
}
}else{
//pdfDoc encoding
while(rawChars.hasMoreTokens()){
nextChar=rawChars.nextToken();
String c = null;
if(nextChar==9 || (!keepReturns && (nextChar==10 || nextChar==13))){
c = " ";
}else if (keepReturns && (nextChar==10 || nextChar==13)){
c = String.valueOf( nextChar );
}else if(nextChar>31 && nextChar<253){
c=StandardFonts.getEncodedChar(StandardFonts.PDF,nextChar);
}
if ( c != null ){
final int len=c.length();
//resize if needed
if(ii+len>=chars.length){
final char[] tmp=new char[len+ii+10];
System.arraycopy(chars, 0, tmp, 0, chars.length);
chars=tmp;
}
//add values
for(int i=0;i= 0) || (ch > 126 && ch < 160) ) {
continue;
}
newString.append(ch);
}
return newString.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy