org.jpedal.objects.PdfData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of OpenViewerFX Show documentation
Show all versions of OpenViewerFX Show documentation
Open Source (LGPL) JavaFX PDF Viewer
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* PdfData.java
* ---------------
*/
package org.jpedal.objects;
import org.jpedal.color.GenericColorSpace;
import org.jpedal.utils.Fonts;
/**
*
* holds text data for extraction and manipulation
*
*
* Pdf routines create 'raw' text data
*
*
* grouping routines will attempt to intelligently stitch together and leave as
* 'processed data' in this class
*
*
* NOTE ONLY methods (NOT public variables) are part of API
*
*/
public class PdfData {
/**
* test orientation
*/
public static final int HORIZONTAL_LEFT_TO_RIGHT = 0;
public static final int HORIZONTAL_RIGHT_TO_LEFT = 1;
public static final int VERTICAL_TOP_TO_BOTTOM = 2;
public static final int VERTICAL_BOTTOM_TO_TOP = 3;
private int pointer;
/**
* flag to show x co-ord has been embedded in content
*/
private boolean widthIsEmbedded;
/**
* used to hide our encoding values in fragments of data so we can strip out
*/
public static final String marker = (String.valueOf(((char) (0))));
//public static final String hiddenMarker = (String.valueOf(((char) (65534))));
/**
* initial array size
*/
protected int max = 2000;
/**
* hold the raw content
*/
public String[] contents = new String[max];
/**
* hold flag on raw content orientation
*/
public int[] f_writingMode = new int[max];
/**
* hold raw content
*/
public int[] text_length = new int[max];
/**
* hold raw content
*/
public float[] f_character_spacing = new float[max];
/**
* hold raw content
*/
public int[] f_end_font_size = new int[max];
/**
* hold raw content
*/
public float[] space_width = new float[max];
/**
* hold raw content
*/
public float[] f_x1 = new float[max];
/**
* hold color content
*/
public String[] colorTag = new String[max];
/**
* hold raw content
*/
public float[] f_x2 = new float[max];
/**
* hold raw content
*/
public float[] f_y1 = new float[max];
/**
* hold raw content
*/
public float[] f_y2 = new float[max];
boolean isColorExtracted;
/**
* create empty object to hold content
*/
public PdfData() {
}
/**
* get number of raw objects on page
*/
public final int getRawTextElementCount() {
return pointer;
}
/**
* clear store of objects once written out
* to reclaim memory. If flag set, sets data to
* state after page decoded before grouping for reparse
*/
public final void flushTextList() {
pointer = 0;
max = 2000;
contents = new String[max];
f_writingMode = new int[max];
text_length = new int[max];
f_character_spacing = new float[max];
f_end_font_size = new int[max];
space_width = new float[max];
f_x1 = new float[max];
f_x2 = new float[max];
f_y1 = new float[max];
f_y2 = new float[max];
colorTag = new String[max];
}
/**
* store line of raw text for later processing
*/
public final void addRawTextElement(final float character_spacing, final int writingMode,
final String font_as_string, final float current_space, final int fontSize,
final float x1, final float y1, final float x2, final float y2,
final StringBuffer processed_line, final int current_text_length, final String currentColorTag, final boolean isXMLExtraction) {
if (processed_line.length() > 0) {
//add tokens
if (isXMLExtraction) {
processed_line.insert(0, font_as_string);
processed_line.append(Fonts.fe);
}
//add color token
if (isColorExtracted) {
processed_line.insert(0, currentColorTag);
processed_line.append(GenericColorSpace.ce);
}
f_writingMode[pointer] = writingMode;
text_length[pointer] = current_text_length;
f_character_spacing[pointer] = character_spacing;
f_x1[pointer] = x1;
colorTag[pointer] = currentColorTag;
f_x2[pointer] = x2;
f_y1[pointer] = y1;
f_y2[pointer] = y2;
contents[pointer] = processed_line.toString();
f_end_font_size[pointer] = fontSize;
space_width[pointer] = current_space * 1000;
pointer++;
//resize pointers
if (pointer == max) {
resizeArrays();
}
}
}
/**
* resize arrays to add newItems to end (-1 makes it grow)
*/
private void resizeArrays() {
float[] temp_f;
int[] temp_i;
String[] temp_s;
// if(newItems<0){
// max=-newItems;
// pointer=max;
// }else if(newItems==0){
if (max < 5000) {
max *= 5;
} else if (max < 10000) {
max *= 2;
} else {
max += 1000;
}
// }
// else{
// max=contents.length+newItems-1;
// pointer=contents.length;
// }
temp_s = contents;
contents = new String[max];
System.arraycopy(temp_s, 0, contents, 0, pointer);
temp_i = f_writingMode;
f_writingMode = new int[max];
f_writingMode = new int[max];
System.arraycopy(temp_i, 0, f_writingMode, 0, pointer);
temp_s = colorTag;
colorTag = new String[max];
System.arraycopy(temp_s, 0, colorTag, 0, pointer);
temp_i = text_length;
text_length = new int[max];
System.arraycopy(temp_i, 0, text_length, 0, pointer);
temp_f = f_character_spacing;
f_character_spacing = new float[max];
System.arraycopy(temp_f, 0, f_character_spacing, 0, pointer);
temp_i = f_end_font_size;
f_end_font_size = new int[max];
System.arraycopy(temp_i, 0, f_end_font_size, 0, pointer);
temp_f = space_width;
space_width = new float[max];
System.arraycopy(temp_f, 0, space_width, 0, pointer);
temp_f = f_x1;
f_x1 = new float[max];
System.arraycopy(temp_f, 0, f_x1, 0, pointer);
temp_f = f_x2;
f_x2 = new float[max];
System.arraycopy(temp_f, 0, f_x2, 0, pointer);
temp_f = f_y1;
f_y1 = new float[max];
System.arraycopy(temp_f, 0, f_y1, 0, pointer);
temp_f = f_y2;
f_y2 = new float[max];
System.arraycopy(temp_f, 0, f_y2, 0, pointer);
}
/**
* set flag to show width in text
*/
public void widthIsEmbedded() {
widthIsEmbedded = true;
}
/**
* show if width in text
*/
public boolean IsEmbedded() {
return widthIsEmbedded;
}
/**
* set colour extraction
*/
public void enableTextColorDataExtraction() {
isColorExtracted = true;
}
/**
* flag to show if color extracted in xml
*/
public boolean isColorExtracted() {
return isColorExtracted;
}
public void dispose() {
contents = null;
f_writingMode = null;
text_length = null;
f_character_spacing = null;
f_end_font_size = null;
space_width = null;
f_x1 = null;
colorTag = null;
f_x2 = null;
f_y1 = null;
f_y2 = null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy