All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.objects.PdfData Maven / Gradle / Ivy

There is a newer version: 7.15.25
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * PdfData.java
 * ---------------
 */
package org.jpedal.objects;

import org.jpedal.color.GenericColorSpace;
import org.jpedal.utils.Fonts;


/**
 * 

* holds text data for extraction and manipulation *

*

* Pdf routines create 'raw' text data *

*

* grouping routines will attempt to intelligently stitch together and leave as * 'processed data' in this class *

*

* NOTE ONLY methods (NOT public variables) are part of API *

*/ public class PdfData { /** * test orientation */ public static final int HORIZONTAL_LEFT_TO_RIGHT = 0; public static final int HORIZONTAL_RIGHT_TO_LEFT = 1; public static final int VERTICAL_TOP_TO_BOTTOM = 2; public static final int VERTICAL_BOTTOM_TO_TOP = 3; private int pointer; /** * flag to show x co-ord has been embedded in content */ private boolean widthIsEmbedded; /** * used to hide our encoding values in fragments of data so we can strip out */ public static final String marker = (String.valueOf(((char) (0)))); //public static final String hiddenMarker = (String.valueOf(((char) (65534)))); /** * initial array size */ protected int max = 2000; /** * hold the raw content */ public String[] contents = new String[max]; /** * hold flag on raw content orientation */ public int[] f_writingMode = new int[max]; /** * hold raw content */ public int[] text_length = new int[max]; /** * hold raw content */ public float[] f_character_spacing = new float[max]; /** * hold raw content */ public int[] f_end_font_size = new int[max]; /** * hold raw content */ public float[] space_width = new float[max]; /** * hold raw content */ public float[] f_x1 = new float[max]; /** * hold color content */ public String[] colorTag = new String[max]; /** * hold raw content */ public float[] f_x2 = new float[max]; /** * hold raw content */ public float[] f_y1 = new float[max]; /** * hold raw content */ public float[] f_y2 = new float[max]; boolean isColorExtracted; /** * create empty object to hold content */ public PdfData() { } /** * get number of raw objects on page */ public final int getRawTextElementCount() { return pointer; } /** * clear store of objects once written out * to reclaim memory. If flag set, sets data to * state after page decoded before grouping for reparse */ public final void flushTextList() { pointer = 0; max = 2000; contents = new String[max]; f_writingMode = new int[max]; text_length = new int[max]; f_character_spacing = new float[max]; f_end_font_size = new int[max]; space_width = new float[max]; f_x1 = new float[max]; f_x2 = new float[max]; f_y1 = new float[max]; f_y2 = new float[max]; colorTag = new String[max]; } /** * store line of raw text for later processing */ public final void addRawTextElement(final float character_spacing, final int writingMode, final String font_as_string, final float current_space, final int fontSize, final float x1, final float y1, final float x2, final float y2, final StringBuffer processed_line, final int current_text_length, final String currentColorTag, final boolean isXMLExtraction) { if (processed_line.length() > 0) { //add tokens if (isXMLExtraction) { processed_line.insert(0, font_as_string); processed_line.append(Fonts.fe); } //add color token if (isColorExtracted) { processed_line.insert(0, currentColorTag); processed_line.append(GenericColorSpace.ce); } f_writingMode[pointer] = writingMode; text_length[pointer] = current_text_length; f_character_spacing[pointer] = character_spacing; f_x1[pointer] = x1; colorTag[pointer] = currentColorTag; f_x2[pointer] = x2; f_y1[pointer] = y1; f_y2[pointer] = y2; contents[pointer] = processed_line.toString(); f_end_font_size[pointer] = fontSize; space_width[pointer] = current_space * 1000; pointer++; //resize pointers if (pointer == max) { resizeArrays(); } } } /** * resize arrays to add newItems to end (-1 makes it grow) */ private void resizeArrays() { float[] temp_f; int[] temp_i; String[] temp_s; // if(newItems<0){ // max=-newItems; // pointer=max; // }else if(newItems==0){ if (max < 5000) { max *= 5; } else if (max < 10000) { max *= 2; } else { max += 1000; } // } // else{ // max=contents.length+newItems-1; // pointer=contents.length; // } temp_s = contents; contents = new String[max]; System.arraycopy(temp_s, 0, contents, 0, pointer); temp_i = f_writingMode; f_writingMode = new int[max]; f_writingMode = new int[max]; System.arraycopy(temp_i, 0, f_writingMode, 0, pointer); temp_s = colorTag; colorTag = new String[max]; System.arraycopy(temp_s, 0, colorTag, 0, pointer); temp_i = text_length; text_length = new int[max]; System.arraycopy(temp_i, 0, text_length, 0, pointer); temp_f = f_character_spacing; f_character_spacing = new float[max]; System.arraycopy(temp_f, 0, f_character_spacing, 0, pointer); temp_i = f_end_font_size; f_end_font_size = new int[max]; System.arraycopy(temp_i, 0, f_end_font_size, 0, pointer); temp_f = space_width; space_width = new float[max]; System.arraycopy(temp_f, 0, space_width, 0, pointer); temp_f = f_x1; f_x1 = new float[max]; System.arraycopy(temp_f, 0, f_x1, 0, pointer); temp_f = f_x2; f_x2 = new float[max]; System.arraycopy(temp_f, 0, f_x2, 0, pointer); temp_f = f_y1; f_y1 = new float[max]; System.arraycopy(temp_f, 0, f_y1, 0, pointer); temp_f = f_y2; f_y2 = new float[max]; System.arraycopy(temp_f, 0, f_y2, 0, pointer); } /** * set flag to show width in text */ public void widthIsEmbedded() { widthIsEmbedded = true; } /** * show if width in text */ public boolean IsEmbedded() { return widthIsEmbedded; } /** * set colour extraction */ public void enableTextColorDataExtraction() { isColorExtracted = true; } /** * flag to show if color extracted in xml */ public boolean isColorExtracted() { return isColorExtracted; } public void dispose() { contents = null; f_writingMode = null; text_length = null; f_character_spacing = null; f_end_font_size = null; space_width = null; f_x1 = null; colorTag = null; f_x2 = null; f_y1 = null; f_y2 = null; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy