org.jpedal.objects.PdfData Maven / Gradle / Ivy
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* PdfData.java
* ---------------
*/
package org.jpedal.objects;
import org.jpedal.color.GenericColorSpace;
import org.jpedal.utils.Fonts;
/**
*
* holds text data for extraction and manipulation
*
*
* Pdf routines create 'raw' text data
*
*
* grouping routines will attempt to intelligently stitch together and leave as
* 'processed data' in this class
*
*
* NOTE ONLY methods (NOT public variables) are part of API
*
*/
public class PdfData {
/**
* test orientation
*/
public static final int HORIZONTAL_LEFT_TO_RIGHT = 0;
public static final int HORIZONTAL_RIGHT_TO_LEFT = 1;
public static final int VERTICAL_TOP_TO_BOTTOM = 2;
public static final int VERTICAL_BOTTOM_TO_TOP = 3;
private int pointer;
/**
* flag to show x co-ord has been embedded in content
*/
private boolean widthIsEmbedded;
/**
* used to hide our encoding values in fragments of data so we can strip out
*/
public static final String marker = (String.valueOf(((char) (0))));
//public static final String hiddenMarker = (String.valueOf(((char) (65534))));
/**
* initial array size
*/
protected int max = 2000;
/**
* hold the raw content
*/
public String[] contents = new String[max];
/**
* hold flag on raw content orientation
*/
public int[] f_writingMode = new int[max];
/**
* hold raw content
*/
public int[] text_length = new int[max];
/**
* hold raw content
*/
public float[] f_character_spacing = new float[max];
/**
* hold raw content
*/
public int[] f_end_font_size = new int[max];
/**
* hold raw content
*/
public float[] space_width = new float[max];
/**
* hold raw content
*/
public float[] f_x1 = new float[max];
/**
* hold color content
*/
public String[] colorTag = new String[max];
/**
* hold raw content
*/
public float[] f_x2 = new float[max];
/**
* hold raw content
*/
public float[] f_y1 = new float[max];
/**
* hold raw content
*/
public float[] f_y2 = new float[max];
boolean isColorExtracted;
/**
* create empty object to hold content
*/
public PdfData() {
}
/**
* get number of raw objects on page
*/
public final int getRawTextElementCount() {
return pointer;
}
/**
* clear store of objects once written out
* to reclaim memory. If flag set, sets data to
* state after page decoded before grouping for reparse
*/
public final void flushTextList() {
pointer = 0;
max = 2000;
contents = new String[max];
f_writingMode = new int[max];
text_length = new int[max];
f_character_spacing = new float[max];
f_end_font_size = new int[max];
space_width = new float[max];
f_x1 = new float[max];
f_x2 = new float[max];
f_y1 = new float[max];
f_y2 = new float[max];
colorTag = new String[max];
}
/**
* store line of raw text for later processing
*/
public final void addRawTextElement(final float character_spacing, final int writingMode,
final String font_as_string, final float current_space, final int fontSize,
final float x1, final float y1, final float x2, final float y2,
final StringBuffer processed_line, final int current_text_length, final String currentColorTag, final boolean isXMLExtraction) {
if (processed_line.length() > 0) {
//add tokens
if (isXMLExtraction) {
processed_line.insert(0, font_as_string);
processed_line.append(Fonts.fe);
}
//add color token
if (isColorExtracted) {
processed_line.insert(0, currentColorTag);
processed_line.append(GenericColorSpace.ce);
}
f_writingMode[pointer] = writingMode;
text_length[pointer] = current_text_length;
f_character_spacing[pointer] = character_spacing;
f_x1[pointer] = x1;
colorTag[pointer] = currentColorTag;
f_x2[pointer] = x2;
f_y1[pointer] = y1;
f_y2[pointer] = y2;
contents[pointer] = processed_line.toString();
f_end_font_size[pointer] = fontSize;
space_width[pointer] = current_space * 1000;
pointer++;
//resize pointers
if (pointer == max) {
resizeArrays();
}
}
}
/**
* resize arrays to add newItems to end (-1 makes it grow)
*/
private void resizeArrays() {
float[] temp_f;
int[] temp_i;
String[] temp_s;
// if(newItems<0){
// max=-newItems;
// pointer=max;
// }else if(newItems==0){
if (max < 5000) {
max *= 5;
} else if (max < 10000) {
max *= 2;
} else {
max += 1000;
}
// }
// else{
// max=contents.length+newItems-1;
// pointer=contents.length;
// }
temp_s = contents;
contents = new String[max];
System.arraycopy(temp_s, 0, contents, 0, pointer);
temp_i = f_writingMode;
f_writingMode = new int[max];
f_writingMode = new int[max];
System.arraycopy(temp_i, 0, f_writingMode, 0, pointer);
temp_s = colorTag;
colorTag = new String[max];
System.arraycopy(temp_s, 0, colorTag, 0, pointer);
temp_i = text_length;
text_length = new int[max];
System.arraycopy(temp_i, 0, text_length, 0, pointer);
temp_f = f_character_spacing;
f_character_spacing = new float[max];
System.arraycopy(temp_f, 0, f_character_spacing, 0, pointer);
temp_i = f_end_font_size;
f_end_font_size = new int[max];
System.arraycopy(temp_i, 0, f_end_font_size, 0, pointer);
temp_f = space_width;
space_width = new float[max];
System.arraycopy(temp_f, 0, space_width, 0, pointer);
temp_f = f_x1;
f_x1 = new float[max];
System.arraycopy(temp_f, 0, f_x1, 0, pointer);
temp_f = f_x2;
f_x2 = new float[max];
System.arraycopy(temp_f, 0, f_x2, 0, pointer);
temp_f = f_y1;
f_y1 = new float[max];
System.arraycopy(temp_f, 0, f_y1, 0, pointer);
temp_f = f_y2;
f_y2 = new float[max];
System.arraycopy(temp_f, 0, f_y2, 0, pointer);
}
/**
* set flag to show width in text
*/
public void widthIsEmbedded() {
widthIsEmbedded = true;
}
/**
* show if width in text
*/
public boolean IsEmbedded() {
return widthIsEmbedded;
}
/**
* set colour extraction
*/
public void enableTextColorDataExtraction() {
isColorExtracted = true;
}
/**
* flag to show if color extracted in xml
*/
public boolean isColorExtracted() {
return isColorExtracted;
}
public void dispose() {
contents = null;
f_writingMode = null;
text_length = null;
f_character_spacing = null;
f_end_font_size = null;
space_width = null;
f_x1 = null;
colorTag = null;
f_x2 = null;
f_y1 = null;
f_y2 = null;
}
}