org.jpedal.utils.StringUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of OpenViewerFX Show documentation
Show all versions of OpenViewerFX Show documentation
An Open Source JavaFX PDF Viewer
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2015 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ---------------
* StringUtils.java
* ---------------
*/
package org.jpedal.utils;
import java.io.UnsupportedEncodingException;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.io.TextTokens;
import org.jpedal.parser.DecoderOptions;
public class StringUtils {
private static final int ampersand = '&';
private static final int ampersandInt = 'A'; //use captial A as not escaped char
private static final int aInt = 97;
private static final int zeroInt = 48;
private static final int nineInt = 57;
private static final int openSquareBracketInt = 91;
private static final int closeSquareBracketInt = 93;
private static final int openCurlyBracket = 40;
private static final int closeCurlyBracket = 41;
private static final int backSlashInt = 92;
private static final int forwardSlashInt = 47;
private static final int hashInt = 35;
private static final int divideInt = 247;
private static final int fullStopInt = 46;
private static final int spaceInt = 32;
private static final int percentInt = 37;
private static final int minusInt = 45;
private static final int underScoreInt = 95;
// private final static int backSlachInt = 92;
// private final static int nInt = 110;
// private final static int newLineInt = 10;
private static final int plusInt = 43;
private static final int pInt = 112;
private static final int colonInt = 58;
private static final int equalsInt = 61;
private static final int cInt = 99;
private static final int qInt = 113;
private static String enc;
static{
enc=System.getProperty("file.encoding");
if(enc.equals("UTF-8") || enc.equals("MacRoman") || enc.equals("Cp1252")){
//fine carry on
}else if(DecoderOptions.isRunningOnMac) {
enc="MacRoman";
} else if(DecoderOptions.isRunningOnWindows) {
enc="Cp1252";
} else {
enc="UTF-8";
}
}
/**
* turn any hex values (ie #e4) into chars
* @param value
* @return
*/
public static final String convertHexChars(final String value) {
//avoid null
if(value==null) {
return value;
}
//find char
final int escapeChar=value.indexOf(hashInt);
if(escapeChar==-1) {
return value;
}
//process
final StringBuilder newString=new StringBuilder();
final int length=value.length();
//newString.setLength(length);
char c;
for(int ii=0;iilength) {
end=length;
}
final String key=value.substring(ii,end);
c=(char)Integer.parseInt(key,16);
ii++;
if(c!=spaceInt) {
newString.append(c);
}
}else {
newString.append(c);
}
}
return newString.toString();
}
/** check to see if the string contains anything other than
* '-' '0-9' '.'
* if so then its not a number.
*/
public static boolean isNumber(final String textString) {
final byte[] data=StringUtils.toBytes(textString);
final int strLength=data.length;
boolean isNumber=true;
//assume true and disprove
for(int j=0;j=zeroInt && data[j] <=nineInt)|| data[j]==fullStopInt
|| (j==0 && data[j]==minusInt)){ //assume and disprove
}else{
isNumber=false;
//exit loop
j=strLength;
}
}
return isNumber;
}
/** replaces all spaces ' ' with underscores '_' to allow the whole name to be used in HTML
*
*/
public static String makeHTMLNameSafe(String name) {
if(name==null || name.isEmpty()) {
return name;
}
char[] chrs = name.toCharArray();
//replace any dodgy chars
if(name.indexOf(percentInt)!=-1 || name.indexOf(spaceInt)!=-1 || name.indexOf(fullStopInt)!=-1 ||
name.indexOf(plusInt)!=-1 || name.indexOf(colonInt)!=-1 || name.indexOf(equalsInt)!=-1 ||
name.indexOf(forwardSlashInt)!=-1 || name.indexOf(backSlashInt)!=-1){
//NOTE: if you add any more please check with main method above for int values and DONT use char
//strings as they are not cross platform. search for 'UNIVERSAL equivalents' to find main method.
for (int i = 0; i < chrs.length; i++) {
switch(chrs[i]){
case ampersand:
chrs[i] = ampersandInt;
break;
case spaceInt:
chrs[i] = underScoreInt;
break;
case fullStopInt:
chrs[i] = minusInt;
break;
//replace & with safe char as images break if in path ?? ANY IDEA WHAT THIS LINE IS??
case percentInt:
chrs[i] = underScoreInt;
break;
case plusInt:
chrs[i] = pInt;
break;
case colonInt:
chrs[i] = cInt;
break;
case equalsInt:
chrs[i] = qInt;
break;
case forwardSlashInt:
chrs[i] = underScoreInt;
break;
case backSlashInt:
chrs[i] = underScoreInt;
break;
}
}
}
final char[] testchrs = {openSquareBracketInt,closeSquareBracketInt,hashInt,divideInt,
openCurlyBracket,closeCurlyBracket};
int count = 0;
for (final char chr1 : chrs) {
for (final char testchr : testchrs) {
if (chr1 == testchr) {
count++;
}
}
}
if(count>0){
int c=0;
final char[] tmp = new char[chrs.length-count];
MAINLOOP:
for (final char chr : chrs) {
for (final char testchr : testchrs) {
if (chr == testchr) {
continue MAINLOOP;
}
}
tmp[c++] = chr;
}
chrs = tmp;
}
if(chrs[0]>=zeroInt && chrs[0]<=nineInt){
final char[] tmp = new char[chrs.length+1];
System.arraycopy(chrs,0,tmp,1,chrs.length);
tmp[0] = aInt;
chrs = tmp;
}
name = new String(chrs);
return name;
}
/**
* read a text String held in fieldName in string
*/
public static String getTextString(final byte[] rawText, final boolean keepReturns) {
String returnText="";
//make sure encoding loaded
StandardFonts.checkLoaded(StandardFonts.PDF);
char[] chars=null;
if(rawText!=null) {
chars=new char[rawText.length*2];
}
int ii=0;
char nextChar;
final TextTokens rawChars=new TextTokens(rawText);
//test to see if unicode
if(rawChars.isUnicode()){
//its unicode
while(rawChars.hasMoreTokens()){
nextChar=rawChars.nextUnicodeToken();
//breask a file and does not appear used so removed 2013/5/20
if(nextChar==9 || (!keepReturns && (nextChar==10 || nextChar==13))){
chars[ii]=32;
ii++;
}else
if(nextChar>31 || (keepReturns && (nextChar==10 || nextChar==13))){
chars[ii]=nextChar;
ii++;
}
}
}else{
//pdfDoc encoding
while(rawChars.hasMoreTokens()){
nextChar=rawChars.nextToken();
String c = null;
if(nextChar==9 || (!keepReturns && (nextChar==10 || nextChar==13))){
c = " ";
}else if (keepReturns && (nextChar==10 || nextChar==13)){
c = String.valueOf( nextChar );
}else if(nextChar>31 && nextChar<253){
c=StandardFonts.getEncodedChar(StandardFonts.PDF,nextChar);
}
if ( c != null ){
final int len=c.length();
//resize if needed
if(ii+len>=chars.length){
final char[] tmp=new char[len+ii+10];
System.arraycopy(chars, 0, tmp, 0, chars.length);
chars=tmp;
}
//add values
for(int i=0;i= 0) || (ch > 126 && ch < 160) ) {
continue;
}
newString.append(ch);
}
return newString.toString();
}
}