Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.jpedal.parser.text.HexTextUtils Maven / Gradle / Ivy
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* HexTextUtils.java
* ---------------
*/
package org.jpedal.parser.text;
import org.jpedal.fonts.CodeSpaceRange;
import org.jpedal.fonts.PdfFont;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.fonts.glyph.T1GlyphFactory;
import org.jpedal.parser.ParserOptions;
/**
* @author markee
*/
class HexTextUtils {
static int getHexValueFromNonEmbedAdobeCMAP(final byte[] stream, final int i, final GlyphData glyphData, final PdfFont currentFontData, final ParserOptions parserOptions) {
final CodeSpaceRange cmap = glyphData.getCodeSpaceRange();
int nn = 0;
final int start = i;
int b1, b2;
int v = 0;
int p = 1;
while (nn < 8) {
b1 = stream[start + nn];
b2 = stream[start + nn + 1];
if (b1 == 62) {
break;
} else if (b2 == 62) {
break;
}
nn += 2;
b1 = (b1 | 32) % 39 - 9; // fast way to convert hex to int value
b2 = (b2 | 32) % 39 - 9; // fast way to convert hex to int value
v = (v << 8) | ((b1 << 4) | b2);
if (cmap.isInCodeSpaceRange(v, nn / 2)) {
break;
}
p++;
}
final int cid = cmap.cidMap[v];
final int uni = cmap.uniMap[cid];
glyphData.setRawInt(v);
glyphData.setRawChar((char) v);
glyphData.setDisplayValue(String.valueOf((char) uni));
glyphData.setUnicodeValue(String.valueOf((char) uni));
float actualWidth = 0;
if (p > 1) {
actualWidth = currentFontData.getDefaultWidth(cid);
if (actualWidth == -1) {
actualWidth = currentFontData.getDefaultWidth(-1);
}
} else {
actualWidth = -1;
if ((currentFontData.getFontType() == StandardFonts.CIDTYPE0 || currentFontData.getFontType() == StandardFonts.CIDTYPE2)) {
actualWidth = currentFontData.getDefaultWidth(cid);
if (actualWidth == -1) {
actualWidth = currentFontData.getDefaultWidth(-1) / 2;
}
}
}
if (actualWidth > 0) {
glyphData.setActualWidth(actualWidth);
}
return i + nn - 1; // the parent code increasing by 1 so reduce 1 here
}
static int getHexValue(final byte[] stream, int i, final GlyphData glyphData, final PdfFont currentFontData, final ParserOptions parserOptions) {
//'<'=60
int chars = 0, nextInt, start = i;
int charSize = glyphData.getCharSize();
//get number of chars
for (int i2 = 1; i2 < charSize; i2++) {
nextInt = stream[start + i2];
if (nextInt == 62) { //allow for less than 4 chars at end of stream (ie 6c>)
i2 = 4;
charSize = 2;
glyphData.setCharSize(2);
} else if (nextInt == 10 || nextInt == 13) { //avoid any returns
start++;
i2--;
} else {
chars++;
}
}
i = getValue(chars, stream, i, glyphData) - 1;
return setValue(glyphData, glyphData.getPossibleValue(), i, currentFontData, parserOptions);
}
static int getHexCIDValue(final byte[] stream, final int i, final GlyphData glyphData, final PdfFont currentFontData, final ParserOptions parserOptions) {
//'<'=60
final int oneByteEndPtr;
int twoByteEndPtr = 0;
//single value
oneByteEndPtr = getValue(1, stream, i, glyphData);
int val = glyphData.getPossibleValue();
//System.out.println("getHexCIDValue val="+val);
setValue(glyphData, val, i, currentFontData, parserOptions);
// int firstVal=val;
//lazy init if needed
if (StandardFonts.CMAP == null) {
StandardFonts.readCMAP();
}
// String firstValue = StandardFonts.CMAP[val];
/*
* read second byte if needed (we always read first time to see if double byte or single)
*/
// final boolean isEmbedded =currentFontData.isFontEmbedded;
//also check if mapped in Charstring
final boolean hasCharString = glyphData.getRawInt() > 0 && currentFontData.CMapName != null && currentFontData.getFontType() == StandardFonts.CIDTYPE0 && currentFontData.getGlyphData().getCharStrings().containsKey(String.valueOf(glyphData.getRawInt()));
final boolean debug = false;
boolean isMultiByte = false;
//ignore these cases
if (currentFontData.CMapName != null && currentFontData.getUnicodeMapping(glyphData.getRawInt()) != null || stream[i] == '>') {
if (debug) {
System.out.println("ignore currentFontData.CMapName=" + currentFontData + ' ' + currentFontData.CMapName + " stream[i+2]=" + (char) stream[i] + ' ' + (char) stream[i + 1] + ' ' + (char) stream[i + 2]);
}
} else if (!hasCharString) { //not sure if really needed
twoByteEndPtr = getValue(3, stream, i, glyphData);
final char combinedVal = (char) glyphData.getPossibleValue();
final int isDouble = currentFontData.isDoubleBytes(val, combinedVal & 255, false);
//if the combined value has a glyph, assume a 4 byte CID value
if (isDouble == 1 || currentFontData.glyphs.getEmbeddedGlyph(new T1GlyphFactory(false), null, null, combinedVal, "", -1, null) != null) {
isMultiByte = true;
val = combinedVal;
if (debug) {
System.out.println("use 2 values=" + Integer.toHexString(combinedVal));
}
}
}
if (isMultiByte) {
return setValue(glyphData, val, twoByteEndPtr - 1, currentFontData, parserOptions);
} else {
return oneByteEndPtr - 1;
}
}
private static int setValue(final GlyphData glyphData, final int val, final int i, final PdfFont currentFontData, final ParserOptions parserOptions) {
//System.out.println("setValue="+val+" "+i+" "+charSize);
glyphData.setRawInt(val);
//i = i + charSize-1; //move offset
glyphData.setRawChar((char) val);
glyphData.setDisplayValue(currentFontData.getGlyphValue(val));
if (currentFontData.isCIDFont() && currentFontData.getCMAP() != null && currentFontData.getUnicodeMapping(val) == null) {
glyphData.setRawChar(glyphData.getDisplayValue().charAt(0));
glyphData.setRawInt(glyphData.getRawChar());
}
if (parserOptions.isTextExtracted()) {
glyphData.setUnicodeValue(currentFontData.getUnicodeValue(glyphData.getDisplayValue(), glyphData.getRawInt()));
}
return i;
}
private static int getValue(final int chars, final byte[] stream, int i, final GlyphData glyphData) {
int topHex, val = 0, charsToFind = chars;
while (charsToFind > -1) {
topHex = stream[i];
//convert to number
if (topHex >= 'A' && topHex <= 'F') {
topHex -= 55;
} else if (topHex >= 'a' && topHex <= 'f') {
topHex -= 87;
} else if (topHex >= '0' && topHex <= '9') {
topHex -= 48;
} else { //ignore 'bum' values
topHex = -1;
}
if (topHex > -1) {
val += (topHex << TD.multiply16[charsToFind]);
charsToFind--;
}
i++;
}
glyphData.setPossibleValue(val);
return i;
}
// public static int getFastHEX(int v) {
// if (v >= 65 && v <= 70) {
// return v - 55;
// } else if (v >= 97 && v <= 102) {
// return v - 87;
// } else if (v >= 48 && v <= 57) {
// return v - 48;
// }
// return 0;
// }
}