All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.parser.text.EscapedTextUtils Maven / Gradle / Ivy

There is a newer version: 7.15.25
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * EscapedTextUtils.java
 * ---------------
 */
package org.jpedal.parser.text;

import org.jpedal.fonts.PdfFont;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.parser.ParserOptions;
import org.jpedal.render.DynamicVectorRenderer;

/**
 * @author markee
 */
class EscapedTextUtils {

    static int getEscapedValue(int i, final byte[] stream, final GlyphData glyphData, final PdfFont currentFontData,
                               final int streamLength, final ParserOptions parserOptions, final DynamicVectorRenderer current) {
        // any escape chars '\\'=92
        i++;

        glyphData.setLastChar(glyphData.getRawChar()); //update last char as escape

        if ((streamLength > (i + 2)) && (Character.isDigit((char) stream[i]))) {

            //see how long number is
            int numberCount = 1;
            if (Character.isDigit((char) stream[i + 1])) {
                numberCount++;
                if (Character.isDigit((char) stream[i + 2])) {
                    numberCount++;
                }
            }

            // convert octal escapes
            int rawInt = TD.readEscapeValue(i, numberCount, 8, stream);
            i = i + numberCount - 1;

            if (rawInt > 255) {
                rawInt -= 256;
            }

            glyphData.setRawChar((char) rawInt); //set to dummy value as may be / value

            glyphData.setRawInt(rawInt);

            glyphData.setDisplayValue(currentFontData.getGlyphValue(rawInt));

            if (parserOptions.isTextExtracted()) {
                glyphData.setUnicodeValue(currentFontData.getUnicodeValue(glyphData.getDisplayValue(), rawInt));
            }

            //allow for \134 (ie \\)
            if (glyphData.getRawChar() == 92) // '\\'=92
            {
                glyphData.setRawChar((char) 120);
            }

        } else {

            int rawInt = stream[i] & 255;
            glyphData.setRawChar((char) rawInt);

            if (glyphData.getRawChar() == 'u') { //convert unicode of format uxxxx to char value
                rawInt = TD.readEscapeValue(i + 1, 4, 16, stream);
                i += 4;
                //rawChar = (char) glyphData.rawInt;
                glyphData.setDisplayValue(currentFontData.getGlyphValue(rawInt));
                if (parserOptions.isTextExtracted()) {
                    glyphData.setUnicodeValue(currentFontData.getUnicodeValue(glyphData.getDisplayValue(), rawInt));
                }

            } else {

                final char testChar = glyphData.getRawChar();
                switch (testChar) {
                    case 'n':
                        rawInt = '\n';
                        glyphData.setRawChar('\n');
                        break;
                    case 'b':
                        rawInt = '\b';
                        glyphData.setRawChar('\b');
                        break;
                    case 't':
                        rawInt = '\t';
                        glyphData.setRawChar('\t');
                        break;
                    case 'r':
                        rawInt = '\r';
                        glyphData.setRawChar('\r');
                        break;
                    case 'f':
                        rawInt = '\f';
                        glyphData.setRawChar('\f');
                        break;
                    default:
                        break;
                }

                glyphData.setDisplayValue(currentFontData.getGlyphValue(rawInt));

                if (parserOptions.isTextExtracted()) {
                    glyphData.setUnicodeValue(currentFontData.getUnicodeValue(glyphData.getDisplayValue(), rawInt));
                }

                if (!glyphData.getDisplayValue().isEmpty()) { //set raw char
                    glyphData.setRawChar(glyphData.getDisplayValue().charAt(0));
                }
            }

            glyphData.setRawInt(rawInt);
        }
        //fix for character wrong in some T1 fonts
        if (currentFontData.getFontType() == StandardFonts.TYPE1 && current.isHTMLorSVG()) {
            final String possAltValue = currentFontData.getMappedChar(glyphData.getRawInt(), true);
            if (possAltValue != null && possAltValue.length() == 1 && possAltValue.equalsIgnoreCase(glyphData.getUnicodeValue().toLowerCase())) {
                glyphData.set(possAltValue);
            }
        }
        return i;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy