All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.parser.text.HexTextUtils Maven / Gradle / Ivy

There is a newer version: 7.15.25
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * HexTextUtils.java
 * ---------------
 */
package org.jpedal.parser.text;

import org.jpedal.fonts.CodeSpaceRange;
import org.jpedal.fonts.PdfFont;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.fonts.glyph.T1GlyphFactory;
import org.jpedal.parser.ParserOptions;

/**
 * @author markee
 */
class HexTextUtils {

	static int getHexValueFromNonEmbedAdobeCMAP(final byte[] stream, final int i, final GlyphData glyphData, final PdfFont currentFontData, final ParserOptions parserOptions) {
		final CodeSpaceRange cmap = glyphData.getCodeSpaceRange();
		int nn = 0;
		final int start = i;
		int b1, b2;
		int v = 0;
		int p = 1;
		while (nn < 8) {
			b1 = stream[start + nn];
			b2 = stream[start + nn + 1];
			if (b1 == 62) {
				break;
			} else if (b2 == 62) {
				break;
			}
			nn += 2;
			b1 = (b1 | 32) % 39 - 9; // fast way to convert hex to int value
			b2 = (b2 | 32) % 39 - 9; // fast way to convert hex to int value
			v = (v << 8) | ((b1 << 4) | b2);
			if (cmap.isInCodeSpaceRange(v, nn / 2)) {
				break;
			}
			p++;
		}

		final int uni = cmap.getEncoding().getUnicodeValue(v);
		glyphData.setRawInt(v);
		glyphData.setRawChar((char) v);
		glyphData.setDisplayValue("" + (char) uni);
		glyphData.setUnicodeValue("" + (char) uni);
		float actualWidth = 0;
		if (p > 1) {
			actualWidth = currentFontData.getDefaultWidth(glyphData.getRawInt());
			if (actualWidth == -1) {
				actualWidth = currentFontData.getDefaultWidth(-1);
			}
		} else {
			actualWidth = -1;
			if ((currentFontData.getFontType() == StandardFonts.CIDTYPE0 || currentFontData.getFontType() == StandardFonts.CIDTYPE2)) {
				actualWidth = currentFontData.getDefaultWidth(glyphData.getRawInt());
				if (actualWidth == -1) {
					actualWidth = currentFontData.getDefaultWidth(-1) / 2;
				}
			}
		}
		if (actualWidth > 0) {
			glyphData.setActualWidth(actualWidth);
		}
		return i + nn - 1; // the parent code increasing by 1 so reduce 1 here
	}

	static int getHexValue(final byte[] stream, int i, final GlyphData glyphData, final PdfFont currentFontData, final ParserOptions parserOptions) {
		//'<'=60

		int chars = 0, nextInt, start = i;

		int charSize = glyphData.getCharSize();
		//get number of chars
		for (int i2 = 1; i2 < charSize; i2++) {
			nextInt = stream[start + i2];

			if (nextInt == 62) { //allow for less than 4 chars at end of stream (ie 6c>)
				i2 = 4;
				charSize = 2;
				glyphData.setCharSize(2);
			} else if (nextInt == 10 || nextInt == 13) { //avoid any returns
				start++;
				i2--;
			} else {
				chars++;
			}
		}

		i = getValue(chars, stream, i, glyphData) - 1;

		return setValue(glyphData, glyphData.getPossibleValue(), i, currentFontData, parserOptions);
	}

	static int getHexCIDValue(final byte[] stream, final int i, final GlyphData glyphData, final PdfFont currentFontData, final ParserOptions parserOptions) {

		//'<'=60
		final int oneByteEndPtr;
		int twoByteEndPtr = 0;

		//single value
		oneByteEndPtr = getValue(1, stream, i, glyphData);

		int val = glyphData.getPossibleValue();

		//System.out.println("getHexCIDValue val="+val);
		setValue(glyphData, val, i, currentFontData, parserOptions);

		//  int firstVal=val;
		//lazy init if needed
		if (StandardFonts.CMAP == null) {
			StandardFonts.readCMAP();
		}

		// String firstValue = StandardFonts.CMAP[val];
		/*
         * read second byte if needed (we always read first time to see if double byte or single)
		 */
		// final boolean isEmbedded =currentFontData.isFontEmbedded;
		//also check if mapped in Charstring
		final boolean hasCharString = glyphData.getRawInt() > 0 && currentFontData.CMapName != null && currentFontData.getFontType() == StandardFonts.CIDTYPE0 && currentFontData.getGlyphData().getCharStrings().containsKey(String.valueOf(glyphData.getRawInt()));

		final boolean debug = false;

		boolean isMultiByte = false;
		//ignore these cases
		if (currentFontData.CMapName != null && currentFontData.getUnicodeMapping(glyphData.getRawInt()) != null || stream[i] == '>') {

			if (debug) {
				System.out.println("ignore currentFontData.CMapName=" + currentFontData + ' ' + currentFontData.CMapName + " stream[i+2]=" + (char) stream[i] + ' ' + (char) stream[i + 1] + ' ' + (char) stream[i + 2]);
			}

		} else if (!hasCharString) { //not sure if really needed

			twoByteEndPtr = getValue(3, stream, i, glyphData);

			final char combinedVal = (char) glyphData.getPossibleValue();

			final int isDouble = currentFontData.isDoubleBytes(val, combinedVal & 255, false);

			//if the combined value has a glyph, assume a 4 byte CID value
			if (isDouble == 1 || currentFontData.glyphs.getEmbeddedGlyph(new T1GlyphFactory(false), null, null, combinedVal, "", -1, null) != null) {
				isMultiByte = true;
				val = combinedVal;

				if (debug) {
					System.out.println("use 2 values=" + Integer.toHexString(combinedVal));
				}
			}
		}

		if (isMultiByte) {
			return setValue(glyphData, val, twoByteEndPtr - 1, currentFontData, parserOptions);
		} else {
			return oneByteEndPtr - 1;
		}
	}

	private static int setValue(final GlyphData glyphData, final int val, final int i, final PdfFont currentFontData, final ParserOptions parserOptions) {

		//System.out.println("setValue="+val+" "+i+" "+charSize);
		glyphData.setRawInt(val);
		//i = i + charSize-1; //move offset
		glyphData.setRawChar((char) val);
		glyphData.setDisplayValue(currentFontData.getGlyphValue(val));
		if (currentFontData.isCIDFont() && currentFontData.getCMAP() != null && currentFontData.getUnicodeMapping(val) == null) {
			glyphData.setRawChar(glyphData.getDisplayValue().charAt(0));
			glyphData.setRawInt(glyphData.getRawChar());
		}
		if (parserOptions.isTextExtracted()) {
			glyphData.setUnicodeValue(currentFontData.getUnicodeValue(glyphData.getDisplayValue(), glyphData.getRawInt()));
		}

		return i;
	}

	private static int getValue(final int chars, final byte[] stream, int i, final GlyphData glyphData) {

		int topHex, val = 0, charsToFind = chars;

		while (charsToFind > -1) {

			topHex = stream[i];

			//convert to number
			if (topHex >= 'A' && topHex <= 'F') {
				topHex -= 55;
			} else if (topHex >= 'a' && topHex <= 'f') {
				topHex -= 87;
			} else if (topHex >= '0' && topHex <= '9') {
				topHex -= 48;
			} else {    //ignore 'bum' values
				topHex = -1;
			}

			if (topHex > -1) {
				val += (topHex << TD.multiply16[charsToFind]);
				charsToFind--;
			}

			i++;
		}

		glyphData.setPossibleValue(val);

		return i;
	}
//	public static int getFastHEX(int v) {
//		if (v >= 65 && v <= 70) {
//			return v - 55;
//		} else if (v >= 97 && v <= 102) {
//			return v - 87;
//		} else if (v >= 48 && v <= 57) {
//			return v - 48;
//		}
//		return 0;
//	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy