Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ---------------
* StructuredContentHandler.java
* ---------------
*/
package org.jpedal.objects.structuredtext;
import java.util.HashMap;
import java.util.Map;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.render.DynamicVectorRenderer;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
/**
* structured content
*/
public class StructuredContentHandler {
/**
* store entries from BMC
*/
private final Map markedContentProperties;
/**
* handle nested levels of marked content
*/
int markedContentLevel;
/**
* stream of marked content
*/
private StringBuffer markedContentSequence;
private static final boolean debug = false;
private boolean contentExtracted;
private String currentKey;
final Map keys;
final Map dictionaries;
Map values;
final boolean buildDirectly;
DynamicVectorRenderer current;
Document doc;
Element root;
//private float x1,y1,x2,y2;
boolean isHTML;
public StructuredContentHandler(final Object markedContent) {
//build either tree of lookuptable
if (markedContent instanceof Map) {
buildDirectly = false;
values = (Map) markedContent;
} else {
buildDirectly = true;
doc = (Document) markedContent;
root = doc.createElement("TaggedPDF-doc");
doc.appendChild(root);
}
if (debug) {
System.out.println("BuildDirectly=" + buildDirectly);
}
//this.currentPdfFile=currentPdfFile;
markedContentProperties = new HashMap();
markedContentLevel = 0;
markedContentSequence = new StringBuffer();
currentKey = "";
keys = new HashMap();
dictionaries = new HashMap();
}
public void DP(final PdfObject BDCobj) {
if (debug) {
System.out.println("DP----------------------------------------------------------" + markedContentLevel);
System.out.println(BDCobj);
System.out.println("BDCobj=" + BDCobj);
}
}
public void BDC(final PdfObject BDCobj) {
//if start of sequence, reinitialise settings
if (markedContentLevel == 0) {
markedContentSequence = new StringBuffer();
}
markedContentLevel++;
//only used in direct mode and breaks non-direct code so remove
if (buildDirectly) {
BDCobj.setIntNumber(PdfDictionary.MCID, -1);
}
final int MCID = BDCobj.getInt(PdfDictionary.MCID);
//save key
if (MCID != -1) {
keys.put(markedContentLevel, String.valueOf(MCID));
}
dictionaries.put(String.valueOf(markedContentLevel), BDCobj);
if (debug) {
System.out.println("BDC----------------------------------------------------------" + markedContentLevel + " MCID=" + MCID);
System.out.println("BDCobj=" + BDCobj);
}
}
public void BMC(String op) {
op = setBMCvalues(op);
if (buildDirectly) {
//read any dictionay work out type
//PdfObject dict=(PdfObject) dictionaries.get(currentKey);
//boolean isBMC=dict==null;
//add node with name for BMC
if (op != null) {
//System.out.println(op+" "+root.getElementsByTagName(op));
Element newRoot = (Element) root.getElementsByTagName(op).item(0);
if (newRoot == null) {
newRoot = doc.createElement(op);
root.appendChild(newRoot);
}
root = newRoot;
}
}
}
String setBMCvalues(String op) {
//stip off /
if (op.startsWith("/")) {
op = op.substring(1);
}
//if start of sequence, reinitialise settings
if (markedContentLevel == 0 && !isHTML) {
markedContentSequence = new StringBuffer();
}
markedContentProperties.put(markedContentLevel, op);
markedContentLevel++;
if (debug) {
System.out.println("BMC----------------------------------------------------------level=" + markedContentLevel + " raw op=" + op);
}
//save label and any dictionary
keys.put(markedContentLevel, op);
return op;
}
public void EMC() {
setEMCValues();
if (buildDirectly) {
final PdfObject BDCobj = dictionaries.get(currentKey);
final boolean isBMC = (BDCobj == null);
if (debug) {
System.out.println(isBMC + " " + currentKey + ' ' + BDCobj + " markedContentSequence=" + markedContentSequence);
}
//any custom tags
// if(BDCobj!=null){
//
// if(metadata!=null){
// final Iterator customValues=metadata.keySet().iterator();
// Object key;
// while(customValues.hasNext()){
// key=customValues.next();
// root.setAttribute(key.toString(), metadata.get(key).toString());
//
// //if(addCoordinates){
// root.setAttribute("x1", String.valueOf((int) x1));
// root.setAttribute("y1", String.valueOf((int) y1));
// root.setAttribute("x2", String.valueOf((int) x2));
// root.setAttribute("y2", String.valueOf((int) y2));
// //}
// }
// }
// }
//add node with name for BMC
if (isBMC) {
if (currentKey != null) {
final Node child = doc.createTextNode(stripEscapeChars(markedContentSequence.toString()));
root.appendChild(child);
final Node oldRoot = root.getParentNode();
if (oldRoot instanceof Element) {
root = (Element) oldRoot;
}
}
} else {
//get root key on dictionary (should only be 1)
//and create node
//Iterator keys=dict.keySet().iterator();
String S = "p"; //(String) keys.next();
//System.out.println("dict="+BDCobj.getObjectRefAsString());
if (S == null) {
S = "p";
}
final Element tag = doc.createElement(S);
root.appendChild(tag);
//add the text
final Node child = doc.createTextNode(markedContentSequence.toString());
tag.appendChild(child);
}
//reset
markedContentSequence = new StringBuffer();
} else {
final String ContentSequence = markedContentSequence.toString();
//System.out.println(currentKey+" "+markedContentSequence);
if (debug) {
System.out.println("write out " + currentKey + " text=" + markedContentSequence + '<');
}
final PdfObject BDCobj = (dictionaries.get(String.valueOf(markedContentLevel)));
// System.out.println("BDCobj="+BDCobj+" currentKey="+currentKey);
//reset on MCID tag
int MCID = -1;
if (BDCobj != null) {
MCID = BDCobj.getInt(PdfDictionary.MCID);
}
if (MCID != -1) {
values.put(String.valueOf(MCID), ContentSequence);
//System.out.println(MCID+" "+ContentSequence);
markedContentSequence = new StringBuffer();
}
//remove used dictionary
dictionaries.remove(String.valueOf(markedContentLevel));
}
if (markedContentLevel > 0) {
markedContentLevel--;
}
if (debug) {
System.out.println("EMC----------------------------------------------------------" + markedContentLevel);
}
}
void setEMCValues() {
//set flag to show some content
contentExtracted = true;
// add current structure to tree
currentKey = keys.get(markedContentLevel);
//if no MCID use current level as key
if (currentKey == null) {
currentKey = String.valueOf(markedContentLevel);
}
if (debug) {
System.out.println("currentKey=" + currentKey + ' ' + keys);
}
}
/**
* store the actual text in the stream
*/
public void setText(final StringBuffer current_value, final float x1, final float y1, final float x2, final float y2) {
if (markedContentSequence.length() == 0) {
markedContentSequence = current_value;
//lose space at start
if (markedContentSequence.length() > 0 && markedContentSequence.charAt(0) == ' ') {
markedContentSequence.deleteCharAt(0);
}
} else { //add space to tidy up
//char c=' ',c2=' ';
//if(current_value.length()>0)
// c=current_value.charAt(0);
//int len=markedContentSequence.length()-1;
//if(len>0)
// c2=markedContentSequence.charAt(len);
//if(c2!='-' && c!='-' && c!='.')
// markedContentSequence.append(' ');
//System.out.println("\nbit=>"+current_value+"<");
//System.out.println("whole=>"+markedContentSequence+"<");
markedContentSequence.append(current_value);
}
// this.x1=x1;
// this.y1=y1;
// this.x2=x2;
// this.y2=y2;
}
//delete escape chars such as \( but allow for \\
private static String stripEscapeChars(final String dict) {
char c, lastC = ' ';
final StringBuilder str = new StringBuilder(dict);
int length = str.length();
for (int ii = 0; ii < length; ii++) {
c = str.charAt(ii);
if (c == '\\' && lastC != '\\') {
str.deleteCharAt(ii);
length--;
}
lastC = c;
}
return str.toString();
}
public boolean hasContent() {
return contentExtracted;
}
}