All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.utils.StringUtils Maven / Gradle / Ivy

There is a newer version: 20151002
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2016 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * StringUtils.java
 * ---------------
 */
package org.jpedal.utils;

import java.io.UnsupportedEncodingException;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.io.TextTokens;
import org.jpedal.parser.DecoderOptions;

public class StringUtils {
    
    private static final int ampersand = '&';
    private static final int ampersandInt = 'A'; //use captial A as not escaped char
    private static final int aInt = 97;
    private static final int zeroInt = 48;
    private static final int nineInt = 57;
    private static final int openSquareBracketInt = 91;
    private static final int closeSquareBracketInt = 93;
    private static final int openCurlyBracket = 40;
    private static final int closeCurlyBracket = 41;
    private static final int backSlashInt = 92;
    private static final int forwardSlashInt = 47;
    private static final int hashInt = 35;
    private static final int divideInt = 247;
    private static final int fullStopInt = 46;
    private static final int spaceInt = 32;
    private static final int percentInt = 37;
    private static final int minusInt = 45;
    private static final int underScoreInt = 95;
//    private final static int backSlachInt = 92;
//    private final static int nInt = 110;
//    private final static int newLineInt = 10;
private static final int plusInt = 43;
    private static final int pInt = 112;
    private static final int colonInt = 58;
    private static final int equalsInt = 61;
    private static final int cInt = 99;
    private static final int qInt = 113;
    
    private static String enc;
    
    static{
        enc=System.getProperty("file.encoding");
        
        if(enc.equals("UTF-8") || enc.equals("MacRoman") || enc.equals("Cp1252")){
            //fine carry on
        }else if(DecoderOptions.isRunningOnMac) {
            enc="MacRoman";
        } else if(DecoderOptions.isRunningOnWindows) {
            enc="Cp1252";
        } else {
            enc="UTF-8";
        }
    }

    /**
     * turn any hex values (ie #e4) into chars
     * @param value
     * @return
     */
    public static final String convertHexChars(final String value) {
        
        //avoid null
        if(value==null) {
            return value;
        }
        
        //find char
        final int escapeChar=value.indexOf(hashInt);
        
        if(escapeChar==-1) {
            return value;
        }
        
        //process
        final StringBuilder newString=new StringBuilder();
        final int length=value.length();
        //newString.setLength(length);
        
        char c;
        
        for(int ii=0;iilength) {
                    end=length;
                }
                final String key=value.substring(ii,end);
                
                c=(char)Integer.parseInt(key,16);
                
                ii++;
                
                if(c!=spaceInt) {
                    newString.append(c);
                }
            }else {
                newString.append(c);
            }
            
            
        }
        
        return newString.toString();
    }
    
    /** check to see if the string contains anything other than
     * '-' '0-9' '.'
     * if so then its not a number.
     */
    public static boolean isNumber(final String textString) {
        final byte[] data=StringUtils.toBytes(textString);
        final int strLength=data.length;
        boolean isNumber=true;
        
        //assume true and disprove
        for(int j=0;j=zeroInt && data[j] <=nineInt)|| data[j]==fullStopInt
                    || (j==0 && data[j]==minusInt)){ //assume and disprove
            }else{
                isNumber=false;
                //exit loop
                j=strLength;
            }
        }
        
        return isNumber;
    }
    
    /** replaces all spaces ' ' with underscores '_' to allow the whole name to be used in HTML
     *
     */
    public static String makeHTMLNameSafe(String name) {
        
        if(name==null || name.isEmpty()) {
            return name;
        }
        
        char[] chrs = name.toCharArray();
        
        //replace any dodgy chars
        if(name.indexOf(percentInt)!=-1 || name.indexOf(spaceInt)!=-1 || name.indexOf(fullStopInt)!=-1 ||
                name.indexOf(plusInt)!=-1 || name.indexOf(colonInt)!=-1 || name.indexOf(equalsInt)!=-1 ||
                name.indexOf(forwardSlashInt)!=-1 || name.indexOf(backSlashInt)!=-1){
            //NOTE: if you add any more please check with main method above for int values and DONT use char
            //strings as they are not cross platform. search for 'UNIVERSAL equivalents' to find main method.
            for (int i = 0; i < chrs.length; i++) {
                switch(chrs[i]){
                    
                    case ampersand:
                        chrs[i] = ampersandInt;
                        break;
                        
                    case spaceInt:
                        chrs[i] = underScoreInt;
                        break;
                        
                    case fullStopInt:
                        chrs[i] = minusInt;
                        break;
                        
                        //replace & with safe char as images break if in path ?? ANY IDEA WHAT THIS LINE IS??
                    case percentInt:
                        chrs[i] = underScoreInt;
                        break;
                        
                    case plusInt:
                        chrs[i] = pInt;
                        break;
                        
                    case colonInt:
                        chrs[i] = cInt;
                        break;
                        
                    case equalsInt:
                        chrs[i] = qInt;
                        break;
                        
                    case forwardSlashInt:
                        chrs[i] = underScoreInt;
                        break;
                        
                    case backSlashInt:
                        chrs[i] = underScoreInt;
                        break;
                }
            }
        }
        
        final char[] testchrs = {openSquareBracketInt,closeSquareBracketInt,hashInt,divideInt,
            openCurlyBracket,closeCurlyBracket};
        int count = 0;
        for (final char chr1 : chrs) {
            for (final char testchr : testchrs) {
                if (chr1 == testchr) {
                    count++;
                }
            }
        }
        
        if(count>0){
            int c=0;
            final char[] tmp = new char[chrs.length-count];
            MAINLOOP:
            for (final char chr : chrs) {
                for (final char testchr : testchrs) {
                    if (chr == testchr) {
                        continue MAINLOOP;
                    }
                }
                tmp[c++] = chr;
            }
            chrs = tmp;
            
        }
        
        if(chrs[0]>=zeroInt && chrs[0]<=nineInt){
            final char[] tmp = new char[chrs.length+1];
            System.arraycopy(chrs,0,tmp,1,chrs.length);
            tmp[0] = aInt;
            chrs = tmp;
        }
        
        name = new String(chrs);
        
        return name;
    }

    /**
     * read a text String held in fieldName in string
     */
    public static String getTextString(final byte[] rawText, final boolean keepReturns) {
        
        String returnText="";
        
        //make sure encoding loaded
        StandardFonts.checkLoaded(StandardFonts.PDF);
        
        char[] chars=null;
        if(rawText!=null) {
            chars=new char[rawText.length*2];
        }
        int ii=0;
        char nextChar;
        
        final TextTokens rawChars=new TextTokens(rawText);
        
        //test to see if unicode
        if(rawChars.isUnicode()){
            //its unicode
            while(rawChars.hasMoreTokens()){
                nextChar=rawChars.nextUnicodeToken(keepReturns);
                
                //breask a file and does not appear used so removed 2013/5/20
                if(nextChar==9 || (!keepReturns && (nextChar==10 || nextChar==13))){
                    chars[ii]=32;
                    ii++;
                }else 
                if(nextChar>31 || (keepReturns && (nextChar==10 || nextChar==13))){
                    chars[ii]=nextChar;
                    ii++;
                }
            }
            
        }else{
            //pdfDoc encoding
            
            while(rawChars.hasMoreTokens()){
                nextChar=rawChars.nextToken();
                
                String c = null;
               if(nextChar==9 || (!keepReturns && (nextChar==10 || nextChar==13))){
                    c = " ";
                }else if (keepReturns && (nextChar==10 || nextChar==13)){
                    c = String.valueOf( nextChar );
                }else if(nextChar>31 && nextChar<253){
                    c=StandardFonts.getEncodedChar(StandardFonts.PDF,nextChar);
                }
                
                if ( c != null ){
                    final int len=c.length();
                    
                    //resize if needed
                    if(ii+len>=chars.length){
                        final char[] tmp=new char[len+ii+10];
                        System.arraycopy(chars, 0, tmp, 0, chars.length);
                        chars=tmp;
                    }
                    
                    //add values
                    for(int i=0;i= 0) || (ch > 126 && ch < 160) ) {
                continue;
            }
            newString.append(ch);
        }
        return newString.toString();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy