org.lockss.util.PdfUtil Maven / Gradle / Ivy
Show all versions of lockss-core Show documentation
/*
Copyright (c) 2000-2019 Board of Trustees of Leland Stanford Jr. University,
all rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
STANFORD UNIVERSITY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Except as contained in this notice, the name of Stanford University shall not
be used in advertising or otherwise to promote the sale, use or other dealings
in this Software without prior written authorization from Stanford University.
*/
package org.lockss.util;
import java.io.*;
import java.util.*;
import org.apache.commons.collections4.iterators.*;
import org.lockss.config.*;
import org.lockss.filter.pdf.*;
import org.lockss.plugin.*;
import org.lockss.plugin.definable.DefinableArchivalUnit;
import org.lockss.util.CloseCallbackInputStream.DeleteFileOnCloseInputStream;
import org.lockss.util.io.DeferredTempFileOutputStream;
import org.pdfbox.cos.*;
import org.pdfbox.util.PDFOperator;
/**
* Utilities for PDF processing and filtering.
* @author Thib Guicherd-Callin
*/
@Deprecated
public class PdfUtil {
/** Filtered PDF files smaller than this will be kept in memory, larger
* than this will be written to a temp file */
static final String PARAM_TEMP_STREAM_THRESHOLD =
Configuration.PREFIX + "pdfutil.tempStreamThreshold";
static final int DEFAULT_TEMP_STREAM_THRESHOLD = 1024 * 1024;
/**
* An interface for looping policies.
* This interface is intended for the following types of loops:
boolean success = resultPolicy.initialValue();
while (...) {
boolean oneStep = doSomething(...);
success = resultPolicy.updateResult(success, oneStep);
if (!resultPolicy.shouldKeepGoing(success)) {
break;
}
}
return success;
* For instance, the above loop could have short-circuiting "or"
* semantics: it returns true as soon as any of the steps returns
* true, and if none return true it returns false. This would be
* achieved with {@link ResultPolicy#initialValue} returning false,
* {@link ResultPolicy#updateResult}(success, oneStep)
* returning success || oneStep
, and
* {@link ResultPolicy#shouldKeepGoing}(success)
* returning !success
.
* To give it non short-circuiting semantics, just make
* {@link ResultPolicy#shouldKeepGoing} return true constantly.
* Likewise, the loop can have "and" semantics with or without
* short-circuiting, for appropriate values of the three
* methods.
* Examples of how to use these result policies can be found
* for instance in {@link AggregateDocumentTransform#transform},
* {@link AggregatePageTransform#transform} or
* {@link TransformSelectedPages#transform}.
* @author Thib Guicherd-Callin
* @see PdfUtil#AND
* @see PdfUtil#AND_ALL
* @see PdfUtil#OR
* @see PdfUtil#OR_ALL
*/
public interface ResultPolicy {
/**
* Provides the initial value for the success flag.
* @return The value of the success flag before the loop.
*/
boolean initialValue();
/**
* Determines whether the loop should continue based on the
* current value of the success flag (passed as argument).
* @param currentResult The current value of the success flag.
* @return Whether the loop should continue based on the current
* value of the success flag.
*/
boolean shouldKeepGoing(boolean currentResult);
/**
* Computes the new value of the success flag, given the
* current value of the success flag and a new result from an
* iteration of the loop.
* @param currentResult The current value of the success flag.
* @param update A new result from an iteration of the loop.
* @return The new value of the success flag.
*/
boolean updateResult(boolean currentResult, boolean update);
}
/**
* A version of {@link ResultPolicy} that implements
* short-circuiting "and" semantics.
* @see PdfUtil#AND_ALL
*/
public static final ResultPolicy AND = new ResultPolicy() {
/* Inherit documentation */
public boolean initialValue() {
return true;
}
/* Inherit documentation */
public boolean shouldKeepGoing(boolean currentResult) {
return currentResult;
}
public String toString() {
return "AND";
}
/* Inherit documentation */
public boolean updateResult(boolean currentResult, boolean update) {
return currentResult && update;
}
};
/**
* A version of {@link ResultPolicy} that implements
* non short-circuiting "and" semantics.
* @see PdfUtil#AND
*/
public static final ResultPolicy AND_ALL = new ResultPolicy() {
/* Inherit documentation */
public boolean initialValue() {
return true;
}
/* Inherit documentation */
public boolean shouldKeepGoing(boolean currentResult) {
return true;
}
public String toString() {
return "AND_ALL";
}
/* Inherit documentation */
public boolean updateResult(boolean currentResult, boolean update) {
return currentResult && update;
}
};
/**
* A version of {@link ResultPolicy} that implements
* short-circuiting "or" semantics.
* @see PdfUtil#OR_ALL
*/
public static final ResultPolicy OR = new ResultPolicy() {
/* Inherit documentation */
public boolean initialValue() {
return false;
}
/* Inherit documentation */
public boolean shouldKeepGoing(boolean currentResult) {
return !currentResult;
}
public String toString() {
return "OR";
}
/* Inherit documentation */
public boolean updateResult(boolean currentResult, boolean update) {
return currentResult || update;
}
};
/**
* A version of {@link ResultPolicy} that implements
* non short-circuiting "or" semantics.
* @see PdfUtil#OR
*/
public static final ResultPolicy OR_ALL = new ResultPolicy() {
/* Inherit documentation */
public boolean initialValue() {
return false;
}
/* Inherit documentation */
public boolean shouldKeepGoing(boolean currentResult) {
return true;
}
public String toString() {
return "OR_ALL";
}
/* Inherit documentation */
public boolean updateResult(boolean currentResult, boolean update) {
return currentResult || update;
}
};
/**
* The PDF {@value}
operator string.
*/
public static final String APPEND_CURVED_SEGMENT = "c";
/**
* The PDF {@value}
operator string.
*/
public static final String APPEND_CURVED_SEGMENT_FINAL = "y";
/**
* The PDF {@value}
operator string.
*/
public static final String APPEND_CURVED_SEGMENT_INITIAL = "v";
/**
* The PDF {@value}
operator string.
*/
public static final String APPEND_RECTANGLE = "re";
/**
* The PDF {@value}
operator string.
*/
public static final String APPEND_STRAIGHT_LINE_SEGMENT = "l";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_COMPATIBILITY_SECTION = "BX";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_INLINE_IMAGE_DATA = "ID";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_INLINE_IMAGE_OBJECT = "BI";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_MARKED_CONTENT = "BMC";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_MARKED_CONTENT_PROP = "BDC";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_NEW_SUBPATH = "m";
/**
* The PDF {@value}
operator string.
*/
public static final String BEGIN_TEXT_OBJECT = "BT";
/**
* The PDF {@value}
operator string.
*/
public static final String CLOSE_FILL_STROKE_EVENODD = "b*";
/**
* The PDF {@value}
operator string.
*/
public static final String CLOSE_FILL_STROKE_NONZERO = "b";
/**
* The PDF {@value}
operator string.
*/
public static final String CLOSE_STROKE = "s";
/**
* The PDF {@value}
operator string.
*/
public static final String CLOSE_SUBPATH = "h";
/**
* The PDF {@value}
operator string.
*/
public static final String CONCATENATE_MATRIX = "cm";
/**
* The PDF {@value}
operator string.
*/
public static final String DEFINE_MARKED_CONTENT_POINT = "MP";
/**
* The PDF {@value}
operator string.
*/
public static final String DEFINE_MARKED_CONTENT_POINT_PROP = "DP";
/**
* The PDF {@value}
operator string.
*/
public static final String END_COMPATIBILITY_SECTION = "EX";
/**
* The PDF {@value}
operator string.
*/
public static final String END_INLINE_IMAGE_OBJECT = "EI";
/**
* The PDF {@value}
operator string.
*/
public static final String END_MARKED_CONTENT = "EMC";
/**
* The PDF {@value}
operator string.
*/
public static final String END_PATH = "n";
/**
* The PDF {@value}
operator string.
*/
public static final String END_TEXT_OBJECT = "ET";
/**
* The PDF {@value}
operator string.
*/
public static final String FILL_EVENODD = "f*";
/**
* The PDF {@value}
operator string.
*/
public static final String FILL_NONZERO = "f";
/**
* The PDF {@value}
operator string.
*/
public static final String FILL_NONZERO_OBSOLETE = "F";
/**
* The PDF {@value}
operator string.
*/
public static final String FILL_STROKE_EVENODD = "B*";
/**
* The PDF {@value}
operator string.
*/
public static final String FILL_STROKE_NONZERO = "B";
/**
* The PDF {@value}
operator string.
*/
public static final String INVOKE_NAMED_XOBJECT = "Do";
/**
* The PDF {@value}
operator string.
*/
public static final String MOVE_TEXT_POSITION = "Td";
/**
* The PDF {@value}
operator string.
*/
public static final String MOVE_TEXT_POSITION_SET_LEADING = "TD";
/**
* The PDF {@value}
operator string.
*/
public static final String MOVE_TO_NEXT_LINE = "T*";
/**
* The PDF {@value}
operator string.
*/
public static final String MOVE_TO_NEXT_LINE_SHOW_TEXT = "\'";
/**
* The PDF {@value}
operator string.
*/
public static final String PAINT_SHADING_PATTERN = "sh";
/**
* A suggested prefix for non-definitional parameters conveying
* hints about PDF filter factories.
* @see DefinableArchivalUnit#SUFFIX_FILTER_FACTORY
* @deprecated Use {@link org.lockss.pdf.PdfUtil#PREFIX_PDF_FILTER_FACTORY_HINT}
*/
@Deprecated
public static final String PREFIX_PDF_FILTER_FACTORY_HINT = org.lockss.pdf.PdfUtil.PREFIX_PDF_FILTER_FACTORY_HINT;
/**
* The PDF MIME type, {@value}
.
* @see RFC3778
* @deprecated Use {@link org.lockss.util.Constants#MIME_TYPE_PDF}
*/
@Deprecated
public static final String PDF_MIME_TYPE = "application/pdf";
/**
* The PDF {@value}
operator string.
*/
public static final String RESTORE_GRAPHICS_STATE = "Q";
/**
* The PDF {@value}
operator string.
*/
public static final String SAVE_GRAPHICS_STATE = "q";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_CHARACTER_SPACING = "Tc";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_CLIPPING_PATH_EVENODD = "W*";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_CLIPPING_PATH_NONZERO = "W";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_CMYK_COLOR_NONSTROKING = "k";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_CMYK_COLOR_STROKING = "K";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_NONSTROKING = "sc";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_NONSTROKING_SPECIAL = "scn";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_RENDERING_INTENT = "ri";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_SPACE_NONSTROKING = "cs";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_SPACE_STROKING = "CS";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_STROKING = "SC";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_COLOR_STROKING_SPECIAL = "SCN";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_FLATNESS_TOLERANCE = "i";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_FROM_GRAPHICS_STATE = "gs";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_GLYPH_WIDTH = "d0";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_GLYPH_WIDTH_BOUNDING_BOX = "d1";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_GRAY_LEVEL_NONSTROKING = "g";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_GRAY_LEVEL_STROKING = "G";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_HORIZONTAL_TEXT_SCALING = "Tz";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_LINE_CAP_STYLE = "J";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_LINE_DASH_PATTERN = "d";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_LINE_JOIN_STYLE = "j";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_LINE_WIDTH = "w";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_MITER_LIMIT = "M";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_RGB_COLOR_NONSTROKING = "rg";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_RGB_COLOR_STROKING = "RG";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_SPACING_MOVE_TO_NEXT_LINE_SHOW_TEXT = "\"";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_TEXT_FONT_AND_SIZE = "Tf";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_TEXT_LEADING = "TL";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_TEXT_MATRIX = "Tm";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_TEXT_RENDERING_MODE = "Tr";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_TEXT_RISE = "Ts";
/**
* The PDF {@value}
operator string.
*/
public static final String SET_WORD_SPACING = "Tw";
/**
* The PDF {@value}
operator string.
*/
public static final String SHOW_TEXT = "Tj";
/**
* The PDF {@value}
operator string.
*/
public static final String SHOW_TEXT_GLYPH_POSITIONING = "TJ";
/**
* The PDF {@value}
operator string.
*/
public static final String STROKE = "S";
/**
* All 73 operators defined by PDF 1.6, in the order they are
* listed in the specification (Appendix A).
* @see PDF Reference, Fifth Edition, Version 1.6
*/
protected static final String[] PDF_1_6_OPERATORS = {
CLOSE_FILL_STROKE_NONZERO,
FILL_STROKE_NONZERO,
CLOSE_FILL_STROKE_EVENODD,
FILL_STROKE_EVENODD,
BEGIN_MARKED_CONTENT_PROP,
BEGIN_INLINE_IMAGE_OBJECT,
BEGIN_MARKED_CONTENT,
BEGIN_TEXT_OBJECT,
BEGIN_COMPATIBILITY_SECTION,
APPEND_CURVED_SEGMENT,
CONCATENATE_MATRIX,
SET_COLOR_SPACE_STROKING,
SET_COLOR_SPACE_NONSTROKING,
SET_LINE_DASH_PATTERN,
SET_GLYPH_WIDTH,
SET_GLYPH_WIDTH_BOUNDING_BOX,
INVOKE_NAMED_XOBJECT,
DEFINE_MARKED_CONTENT_POINT_PROP,
END_INLINE_IMAGE_OBJECT,
END_MARKED_CONTENT,
END_TEXT_OBJECT,
END_COMPATIBILITY_SECTION,
FILL_NONZERO,
FILL_NONZERO_OBSOLETE,
FILL_EVENODD,
SET_GRAY_LEVEL_STROKING,
SET_GRAY_LEVEL_NONSTROKING,
SET_FROM_GRAPHICS_STATE,
CLOSE_SUBPATH,
SET_FLATNESS_TOLERANCE,
BEGIN_INLINE_IMAGE_DATA,
SET_LINE_JOIN_STYLE,
SET_LINE_CAP_STYLE,
SET_CMYK_COLOR_STROKING,
SET_CMYK_COLOR_NONSTROKING,
APPEND_STRAIGHT_LINE_SEGMENT,
BEGIN_NEW_SUBPATH,
SET_MITER_LIMIT,
DEFINE_MARKED_CONTENT_POINT,
END_PATH,
SAVE_GRAPHICS_STATE,
RESTORE_GRAPHICS_STATE,
APPEND_RECTANGLE,
SET_RGB_COLOR_STROKING,
SET_RGB_COLOR_NONSTROKING,
SET_COLOR_RENDERING_INTENT,
CLOSE_STROKE,
STROKE,
SET_COLOR_STROKING,
SET_COLOR_NONSTROKING,
SET_COLOR_STROKING_SPECIAL,
SET_COLOR_NONSTROKING_SPECIAL,
PAINT_SHADING_PATTERN,
MOVE_TO_NEXT_LINE,
SET_CHARACTER_SPACING,
MOVE_TEXT_POSITION,
MOVE_TEXT_POSITION_SET_LEADING,
SET_TEXT_FONT_AND_SIZE,
SHOW_TEXT,
SHOW_TEXT_GLYPH_POSITIONING,
SET_TEXT_LEADING,
SET_TEXT_MATRIX,
SET_TEXT_RENDERING_MODE,
SET_TEXT_RISE,
SET_WORD_SPACING,
SET_HORIZONTAL_TEXT_SCALING,
APPEND_CURVED_SEGMENT_INITIAL,
SET_LINE_WIDTH,
SET_CLIPPING_PATH_NONZERO,
SET_CLIPPING_PATH_EVENODD,
APPEND_CURVED_SEGMENT_FINAL,
MOVE_TO_NEXT_LINE_SHOW_TEXT,
SET_SPACING_MOVE_TO_NEXT_LINE_SHOW_TEXT,
};
/**
* A logger for use by this class.
*/
private static Logger logger = Logger.getLogger();
/**
* Applies the given transform to the given PDF document, and
* saves the result to the given output stream.
* @param documentTransform A PDF transform.
* @param pdfDocument A PDF document.
* @param outputStream An output stream into which to write the
* transformed PDF document.
*/
public static boolean applyAndSave(DocumentTransform documentTransform,
PdfDocument pdfDocument,
OutputStream outputStream) {
try {
boolean ret = documentTransform.transform(pdfDocument);
logger.debug2("Document transform result: " + ret);
pdfDocument.save(outputStream);
return ret;
}
catch (OutOfMemoryError oome) {
logger.error("Out of memory in the PDF framework", oome);
throw oome; // rethrow
}
catch (IOException ioe) {
logger.error("Document transform failed", ioe);
return false;
}
}
public static InputStream applyFromInputStream(OutputDocumentTransform documentTransform,
InputStream inputStream) {
PdfDocument pdfDocument = null;
DeferredTempFileOutputStream outputStream = null;
Configuration config = CurrentConfig.getCurrentConfig();
int tempStreamThreshold = config.getInt(PARAM_TEMP_STREAM_THRESHOLD,
DEFAULT_TEMP_STREAM_THRESHOLD);
try {
// Parse the PDF file
pdfDocument = new PdfDocument(inputStream);
// Create a thresholding output stream
outputStream = new DeferredTempFileOutputStream(tempStreamThreshold);
// Apply the output document transform into the output stream
if (documentTransform.transform(pdfDocument, outputStream)) {
outputStream.close();
logger.debug2("Transform from input stream succeeded");
}
else {
deleteTempFile(outputStream);
logger.debug2("Transform from input stream did not succeed; using PDF document as is");
outputStream = new DeferredTempFileOutputStream(tempStreamThreshold);
pdfDocument.save(outputStream);
outputStream.close();
}
// Return the transformed PDF file as an input stream
return outputStream.getDeleteOnCloseInputStream();
}
catch (OutOfMemoryError oome) {
logger.error("Out of memory in the PDF framework", oome);
throw oome; // rethrow
}
catch (Exception ioe) {
logger.error("Transform from input stream failed", ioe);
deleteTempFile(outputStream);
return null;
}
finally {
PdfDocument.close(pdfDocument);
}
}
private static void deleteTempFile(DeferredTempFileOutputStream dtfos) {
if (dtfos != null) {
dtfos.deleteTempFile();
}
}
public static OutputDocumentTransform getOutputDocumentTransform(ArchivalUnit au) {
String key = PREFIX_PDF_FILTER_FACTORY_HINT + Constants.MIME_TYPE_PDF + DefinableArchivalUnit.SUFFIX_HASH_FILTER_FACTORY;
String className = AuUtil.getTitleAttribute(au, key);
if (className == null) {
logger.debug2("No PDF filter factory hint");
return null;
}
try {
OutputDocumentTransform ret =
(OutputDocumentTransform)au.getPlugin().newAuxClass(className, OutputDocumentTransform.class);
logger.debug2("Successfully loaded and instantiated " + ret.getClass().getName());
return ret;
} catch (org.lockss.daemon.PluginException.InvalidDefinition e) {
logger.error("Can't load PDF transform", e);
return null;
} catch (RuntimeException e) {
logger.error("Can't load PDF transform", e);
return null;
}
}
public static Iterator getPdf16Operators() {
return UnmodifiableIterator.unmodifiableIterator(new ObjectArrayIterator(PDF_1_6_OPERATORS));
}
/**
* Extracts the float data associated with the PDF token at the
* given index that is known to be a PDF float.
* Preconditions:
*
* isPdfFloat(tokens, index)
*
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return The float associated with the selected PDF float.
* @see #isPdfFloat(List, int)
* @see #getPdfFloat(Object)
*/
public static float getPdfFloat(List tokens,
int index) {
return getPdfFloat(tokens.get(index));
}
/**
* Extracts the float data associated with a PDF token that is
* a PDF float.
* Preconditions:
*
* isPdfFloat(pdfFloat)
*
* @param pdfFloat A PDF float.
* @return The float associated with this PDF float.
* @see COSFloat#floatValue
* @see #isPdfFloat(Object)
*/
public static float getPdfFloat(Object pdfFloat) {
return ((COSFloat)pdfFloat).floatValue();
}
/**
* Extracts the integer data associated with the PDF token at the
* given index that is known to be a PDF integer.
* Preconditions:
*
* isPdfInteger(tokens, index)
*
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return The integer associated with the selected PDF integer.
* @see #isPdfInteger(List, int)
* @see #getPdfInteger(Object)
*/
public static int getPdfInteger(List tokens,
int index) {
return getPdfInteger(tokens.get(index));
}
/**
* Extracts the integer data associated with a PDF token that is
* a PDF integer.
* Preconditions:
*
* isPdfInteger(pdfInteger)
*
* @param pdfInteger A PDF integer.
* @return The integer associated with this PDF integer.
* @see COSInteger#intValue
* @see #isPdfInteger(Object)
*/
public static int getPdfInteger(Object pdfInteger) {
return ((COSInteger)pdfInteger).intValue();
}
/**
* Extracts the number data (expressed as a float) associated
* with the PDF token at the given index that is known to be a PDF number.
* Preconditions:
*
* isPdfNumber(tokens, index)
*
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return The number (as a float) associated with the selected PDF number.
* @see #getPdfNumber(Object)
*/
public static float getPdfNumber(List tokens,
int index) {
return getPdfNumber(tokens.get(index));
}
/**
* Extracts the integer data associated with a PDF token that is
* a PDF integer.
* Preconditions:
*
* isPdfNumber(pdfNumber)
*
* @param pdfNumber A PDF number.
* @return The number (as a float) associated with this PDF number.
* @see COSInteger#intValue
* @see #isPdfInteger(Object)
*/
public static float getPdfNumber(Object pdfNumber) {
if (isPdfFloat(pdfNumber)) {
return getPdfFloat(pdfNumber);
}
else /* isPdfInteger(pdfNumber) */ {
return (float)getPdfInteger(pdfNumber);
}
}
public static Iterator getPdfOperators() {
return getPdf16Operators();
}
/**
* Extracts the string data associated with the PDF token at the
* given index that is known to be a PDF string.
* Preconditions:
*
* isPdfString(tokens, index)
*
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return The {@link String} associated with the selected PDF string.
* @see #isPdfString(List, int)
* @see #getPdfString(Object)
*/
public static String getPdfString(List tokens,
int index) {
return getPdfString(tokens.get(index));
}
/**
* Extracts the string data associated with a PDF token that is
* a PDF string.
* Preconditions:
*
* isPdfString(pdfString)
*
* @param pdfString A PDF string.
* @return The {@link String} associated with this PDF string.
* @see COSString#getString
* @see #isPdfString(Object)
*/
public static String getPdfString(Object pdfString) {
return ((COSString)pdfString).getString();
}
/**
* Determines if the token at the given index is
* {@link #BEGIN_TEXT_OBJECT}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isBeginTextObject(Object)
*/
public static boolean isBeginTextObject(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isBeginTextObject(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #BEGIN_TEXT_OBJECT}.
* @param candidateToken A candidate PDF token.
* @return True if the argument is the expected operator, false
* otherwise.
* @see #BEGIN_TEXT_OBJECT
* @see #matchPdfOperator(Object, String)
*/
public static boolean isBeginTextObject(Object candidateToken) {
return matchPdfOperator(candidateToken,
BEGIN_TEXT_OBJECT);
}
/**
* Determines if the token at the given index is
* {@link #END_TEXT_OBJECT}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isEndTextObject(Object)
*/
public static boolean isEndTextObject(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isEndTextObject(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #END_TEXT_OBJECT}.
* @param candidateToken A candidate PDF token.
* @return True is the argument is the expected operator, false
* otherwise.
* @see #END_TEXT_OBJECT
* @see #matchPdfOperator(Object, String)
*/
public static boolean isEndTextObject(Object candidateToken) {
return matchPdfOperator(candidateToken,
END_TEXT_OBJECT);
}
/**
* Determines if the token at the given index is
* {@link #MOVE_TO_NEXT_LINE_SHOW_TEXT}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isMoveToNextLineShowText(Object)
*/
public static boolean isMoveToNextLineShowText(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isMoveToNextLineShowText(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #MOVE_TO_NEXT_LINE_SHOW_TEXT}.
* @param candidateToken A candidate PDF token.
* @return True is the argument is the expected operator, false
* otherwise.
* @see #MOVE_TO_NEXT_LINE_SHOW_TEXT
* @see #matchPdfOperator(Object, String)
*/
public static boolean isMoveToNextLineShowText(Object candidateToken) {
return matchPdfOperator(candidateToken,
MOVE_TO_NEXT_LINE_SHOW_TEXT);
}
/**
* Determines if a candidate PDF token at the given index is a PDF float token.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return True if the selected token is a PDF float, false otherwise.
* @see #isPdfFloat(Object)
*/
public static boolean isPdfFloat(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isPdfFloat(tokens.get(index));
}
/**
* Determines if a candidate PDF token is a PDF float token.
* @param candidateToken A candidate PDF token.
* @return True if the argument is a PDF float, false otherwise.
* @see COSFloat
*/
public static boolean isPdfFloat(Object candidateToken) {
return candidateToken instanceof COSFloat;
}
/**
* Determines if a candidate PDF token at the given index is a PDF integer token.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return True if the selected token is a PDF integer, false otherwise.
* @see #isPdfInteger(Object)
*/
public static boolean isPdfInteger(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isPdfInteger(tokens.get(index));
}
/**
* Determines if a candidate PDF token is a PDF integer.
* @param candidateToken A candidate PDF toekn.
* @return True if the argument is a PDF integer, false otherwise.
* @see COSInteger
*/
public static boolean isPdfInteger(Object candidateToken) {
return candidateToken instanceof COSInteger;
}
/**
* Determines if a candidate PDF token at the given index is a PDF number token.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return True if the selected token is a PDF number, false otherwise.
* @see #isPdfNumber(Object)
*/
public static boolean isPdfNumber(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isPdfNumber(tokens.get(index));
}
/**
* Determines if a candidate PDF token is a PDF number.
* @param candidateToken A candidate PDF toekn.
* @return True if the argument is a PDF integer or a PDF float,
* false otherwise.
* @see #isPdfFloat(Object)
* @see #isPdfInteger(Object)
*/
public static boolean isPdfNumber(Object candidateToken) {
return isPdfFloat(candidateToken)
|| isPdfInteger(candidateToken);
}
/**
* Determines if a candidate PDF token at the given index is a PDF string token.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @return True if the selected token is a PDF string, false otherwise.
* @see #isPdfString(Object)
*/
public static boolean isPdfString(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isPdfString(tokens.get(index));
}
/**
* Determines if a candidate PDF token is a PDF string token.
* @param candidateToken A candidate PDF toekn.
* @return True if the argument is a PDF string, false otherwise.
* @see COSString
*/
public static boolean isPdfString(Object candidateToken) {
return candidateToken instanceof COSString;
}
/**
* Determines if the token at the given index is
* {@link #SET_RGB_COLOR_NONSTROKING}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isSetRgbColorNonStroking(Object)
*/
public static boolean isSetRgbColorNonStroking(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isSetRgbColorNonStroking(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #SET_RGB_COLOR_NONSTROKING}.
* @param candidateToken A candidate PDF token.
* @return True is the argument is the expected operator, false
* otherwise.
* @see #SET_RGB_COLOR_NONSTROKING
* @see #matchPdfOperator(Object, String)
*/
public static boolean isSetRgbColorNonStroking(Object candidateToken) {
return matchPdfOperator(candidateToken,
SET_RGB_COLOR_NONSTROKING);
}
/**
* Determines if the token at the given index is
* {@link #SET_SPACING_MOVE_TO_NEXT_LINE_SHOW_TEXT}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isSetSpacingMoveToNextLineShowText(Object)
*/
public static boolean isSetSpacingMoveToNextLineShowText(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isSetSpacingMoveToNextLineShowText(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #SET_SPACING_MOVE_TO_NEXT_LINE_SHOW_TEXT}.
* @param candidateToken A candidate PDF token.
* @return True is the argument is the expected operator, false
* otherwise.
* @see #SET_SPACING_MOVE_TO_NEXT_LINE_SHOW_TEXT
* @see #matchPdfOperator(Object, String)
*/
public static boolean isSetSpacingMoveToNextLineShowText(Object candidateToken) {
return matchPdfOperator(candidateToken,
SET_SPACING_MOVE_TO_NEXT_LINE_SHOW_TEXT);
}
/**
*
* @param tokens
* @param index
* @return TODO
*/
public static boolean isSetTextMatrix(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isSetTextMatrix(tokens.get(index));
}
/**
*
* @param candidateToken
* @return TODO
*/
public static boolean isSetTextMatrix(Object candidateToken) {
return matchPdfOperator(candidateToken,
SET_TEXT_MATRIX);
}
/**
* Determines if the token at the given index is
* {@link #SHOW_TEXT}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isShowText(Object)
*/
public static boolean isShowText(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isShowText(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #SHOW_TEXT}.
* @param candidateToken A candidate PDF token.
* @return True is the argument is the expected operator, false
* otherwise.
* @see #SHOW_TEXT
* @see #matchPdfOperator(Object, String)
*/
public static boolean isShowText(Object candidateToken) {
return matchPdfOperator(candidateToken,
SHOW_TEXT);
}
/**
* Determines if the token at the given index is
* {@link #SHOW_TEXT_GLYPH_POSITIONING}.
* @param tokens A list of tokens.
* @param index The index of the candidate token.
* @return True if the selected token is the expected operator, false
* otherwise.
* @see #isShowTextGlyphPositioning(Object)
*/
public static boolean isShowTextGlyphPositioning(List tokens,
int index) {
return 0 <= index
&& index < tokens.size()
&& isShowTextGlyphPositioning(tokens.get(index));
}
/**
* Determines if the given token is
* {@link #SHOW_TEXT_GLYPH_POSITIONING}.
* @param candidateToken A candidate PDF token.
* @return True is the argument is the expected operator, false
* otherwise.
* @see #SHOW_TEXT_GLYPH_POSITIONING
* @see #matchPdfOperator(Object, String)
*/
public static boolean isShowTextGlyphPositioning(Object candidateToken) {
return matchPdfOperator(candidateToken,
SHOW_TEXT_GLYPH_POSITIONING);
}
/**
* Determines if the token at the given index is a PDF float
* with the given value.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param num A value to match the token against.
* @return True if the selected token is a PDF float and its value
* is equal to the given value, false otherwise.
* @see #matchPdfFloat(Object, float)
*/
public static boolean matchPdfFloat(List tokens,
int index,
float num) {
return 0 <= index
&& index < tokens.size()
&& matchPdfFloat(tokens.get(index),
num);
}
/**
* Determines if the given token is a PDF float
* with the given value.
* @param candidateToken A candidate PDF token.
* @param num A value to match the token against.
* @return True if the argument is a PDF float and its value
* is equal to the given value, false otherwise.
* @see #isPdfFloat(Object)
* @see #getPdfFloat(Object)
*/
public static boolean matchPdfFloat(Object candidateToken,
float num) {
return isPdfFloat(candidateToken)
&& getPdfFloat(candidateToken) == num;
}
/**
* Determines if the token at the given index is a PDF integer
* with the given value.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param num A value to match the token against.
* @return True if the selected token is a PDF integer and its value
* is equal to the given value, false otherwise.
* @see #matchPdfInteger(Object, int)
*/
public static boolean matchPdfInteger(List tokens,
int index,
int num) {
return 0 <= index
&& index < tokens.size()
&& matchPdfInteger(tokens.get(index),
num);
}
/**
* Determines if the given token is a PDF integer
* with the given value.
* @param candidateToken A candidate PDF token.
* @param num A value to match the token against.
* @return True if the argument is a PDF integer and its value
* is equal to the given value, false otherwise.
* @see #isPdfInteger(Object)
* @see #getPdfInteger(Object)
*/
public static boolean matchPdfInteger(Object candidateToken,
int num) {
return isPdfInteger(candidateToken)
&& getPdfInteger(candidateToken) == num;
}
/**
* Determines if the token at the given index is a PDF number
* with the given value (expressed as a float).
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param num A value to match the token against.
* @return True if the selected token is a PDF number and its value
* is equal to the given value, false otherwise.
* @see #matchPdfNumber(Object, float)
*/
public static boolean matchPdfNumber(List tokens,
int index,
float num) {
return 0 <= index
&& index < tokens.size()
&& matchPdfNumber(tokens.get(index),
num);
}
/**
* Determines if the given token is a PDF number
* with the given value (expressed as a float).
* @param candidateToken A candidate PDF token.
* @param num A value to match the token against.
* @return True if the argument is a PDF number and its value
* is equal to the given value, false otherwise.
* @see #isPdfNumber(Object)
* @see #getPdfNumber(Object)
*/
public static boolean matchPdfNumber(Object candidateToken,
float num) {
return isPdfNumber(candidateToken)
&& getPdfNumber(candidateToken) == num;
}
/**
* Determines if the token at the given index is a PDF operator,
* and if so, if it is the expected operator..
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param expectedOperator A PDF operator string to match the token
* against.
* @return True if the selected token is a PDF operator of the expected
* type, false otherwise.
* @see #matchPdfFloat(Object, float)
*/
public static boolean matchPdfOperator(List tokens,
int index,
String expectedOperator) {
return 0 <= index
&& index < tokens.size()
&& matchPdfOperator(tokens.get(index),
expectedOperator);
}
/**
* Determines if a PDF token is a PDF operator, if is so,
* if it is the expected operator.
* @param candidateToken A candidate PDF token.
* @param expectedOperator A PDF operator string to match the token against.
* @return True if the argument is a PDF operator of the expected
* type, false otherwise.
*/
public static boolean matchPdfOperator(Object candidateToken,
String expectedOperator) {
return candidateToken instanceof PDFOperator
&& ((PDFOperator)candidateToken).getOperation().equals(expectedOperator);
}
/**
* Determines if the token at the given index is a PDF string
* and if it equals the given value.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param str A value to match the token against with {@link String#equals}.
* @return True if the selected token is a PDF string and its value
* is equal to the given value, false otherwise.
* @see #matchPdfString(Object, String)
*/
public static boolean matchPdfString(List tokens,
int index,
String str) {
return 0 <= index
&& index < tokens.size()
&& matchPdfString(tokens.get(index),
str);
}
/**
* Determines if the given token is a PDF string
* and if it equals the given value.
* @param candidateToken A candidate PDF token.
* @param str A value to match the token against with {@link String#equals}.
* @return True if the argument is a PDF string and its value
* is equal to the given value, false otherwise.
* @see #isPdfString(Object)
* @see #getPdfString(Object)
*/
public static boolean matchPdfString(Object candidateToken,
String str) {
return isPdfString(candidateToken)
&& getPdfString(candidateToken).equals(str);
}
/**
* Determines if the token at the given index is a PDF string
* and if it ends with the given value.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param str A value to match the token against with {@link String#endsWith(String)}.
* @return True if the selected token is a PDF string and its value
* ends with the given value, false otherwise.
* @see #matchPdfStringEndsWith(Object, String)
*/
public static boolean matchPdfStringEndsWith(List tokens,
int index,
String str) {
return 0 <= index
&& index < tokens.size()
&& matchPdfStringEndsWith(tokens.get(index),
str);
}
/**
* Determines if the given token is a PDF string
* and if it ends with the given value.
* @param candidateToken A candidate PDF token.
* @param str A value to match the token against with {@link String#endsWith(String)}.
* @return True if the argument is a PDF string and its value
* ends with the given value, false otherwise.
* @see #isPdfString(Object)
* @see #getPdfString(Object)
*/
public static boolean matchPdfStringEndsWith(Object candidateToken,
String str) {
return isPdfString(candidateToken)
&& getPdfString(candidateToken).endsWith(str);
}
/**
* Determines if the token at the given index is a PDF string
* and if it matches the given regular expression.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param str A regular expression to match the token against with {@link String#matches(String)}.
* @return True if the selected token is a PDF string and its value
* matches the given regular expression, false otherwise.
* @see #matchPdfStringStartsWith(Object, String)
*/
public static boolean matchPdfStringMatches(List tokens,
int index,
String regex) {
return 0 <= index
&& index < tokens.size()
&& matchPdfStringMatches(tokens.get(index),
regex);
}
/**
* Determines if the given token is a PDF string
* and if it matches the given regular expression.
* @param candidateToken A candidate PDF token.
* @param str A regular expression to match the token against with {@link String#matches(String)}.
* @return True if the argument is a PDF string and its value
* matches the given regular expression, false otherwise.
* @see #isPdfString(Object)
* @see #getPdfString(Object)
*/
public static boolean matchPdfStringMatches(Object candidateToken,
String regex) {
return isPdfString(candidateToken)
&& getPdfString(candidateToken).matches(regex);
}
/**
* Determines if the token at the given index is a PDF string
* and if it starts with the given value.
* @param tokens A list of tokens.
* @param index The index of the selected token.
* @param str A value to match the token against with {@link String#startsWith(String)}.
* @return True if the selected token is a PDF string and its value
* starts with the given value, false otherwise.
* @see #matchPdfStringStartsWith(Object, String)
*/
public static boolean matchPdfStringStartsWith(List tokens,
int index,
String str) {
return 0 <= index
&& index < tokens.size()
&& matchPdfStringStartsWith(tokens.get(index),
str);
}
/**
* Determines if the given token is a PDF string
* and if it starts with the given value.
* @param candidateToken A candidate PDF token.
* @param str A value to match the token against with {@link String#startsWith(String)}.
* @return True if the argument is a PDF string and its value
* starts with the given value, false otherwise.
* @see #isPdfString(Object)
* @see #getPdfString(Object)
*/
public static boolean matchPdfStringStartsWith(Object candidateToken,
String str) {
return isPdfString(candidateToken)
&& getPdfString(candidateToken).startsWith(str);
}
/**
*
* @param tokens
* @param index
* @param red
* @param green
* @param blue
* @return TODO
* @see #isSetRgbColorNonStroking(List, int)
* @see #matchPdfNumber(List, int, float)
*/
public static boolean matchSetRgbColorNonStroking(List tokens,
int index,
float red,
float green,
float blue) {
return isSetRgbColorNonStroking(tokens, index)
&& matchPdfNumber(tokens, index - 3, red)
&& matchPdfNumber(tokens, index - 2, green)
&& matchPdfNumber(tokens, index - 1, blue);
}
/**
*
* @param tokens
* @param index
* @param red
* @param green
* @param blue
* @return TODO
* @see #matchSetRgbColorNonStroking(List, int, float, float, float)
*/
public static boolean matchSetRgbColorNonStroking(List tokens,
int index,
int red,
int green,
int blue) {
return matchSetRgbColorNonStroking(tokens,
index,
(float)red,
(float)green,
(float)blue);
}
/**
*
* @param tokens
* @param index
* @return TODO
*/
public static boolean matchSetTextMatrix(List tokens,
int index) {
return isSetTextMatrix(tokens, index)
&& isPdfNumber(tokens, index - 6)
&& isPdfNumber(tokens, index - 5)
&& isPdfNumber(tokens, index - 4)
&& isPdfNumber(tokens, index - 3)
&& isPdfNumber(tokens, index - 2)
&& isPdfNumber(tokens, index - 1);
}
/**
*
* @param tokens
* @param index
* @return TODO
* @see #isShowText(List, int)
* @see #isPdfString(List, int)
*/
public static boolean matchShowText(List tokens,
int index) {
return isShowText(tokens, index)
&& isPdfString(tokens, index - 1);
}
/**
*
* @param tokens
* @param index
* @param str
* @return TODO
* @see #isShowText(List, int)
* @see #matchPdfString(List, int, String)
*/
public static boolean matchShowText(List tokens,
int index,
String str) {
return isShowText(tokens, index)
&& matchPdfString(tokens, index - 1, str);
}
/**
*
* @param tokens
* @param index
* @param str
* @return TODO
* @see #isShowText(List, int)
* @see #matchPdfStringEndsWith(List, int, String)
*/
public static boolean matchShowTextEndsWith(List tokens,
int index,
String str) {
return isShowText(tokens, index)
&& matchPdfStringEndsWith(tokens, index - 1, str);
}
/**
*
* @param tokens
* @param index
* @param regex
* @return TODO
* @see #isShowText(List, int)
* @see #matchPdfStringMatches(List, int, String)
*/
public static boolean matchShowTextMatches(List tokens,
int index,
String regex) {
return isShowText(tokens, index)
&& matchPdfStringMatches(tokens, index - 1, regex);
}
/**
*
* @param tokens
* @param index
* @param str
* @return TODO
* @see #isShowText(List, int)
* @see #matchPdfStringStartsWith(List, int, String)
*/
public static boolean matchShowTextStartsWith(List tokens,
int index,
String str) {
return isShowText(tokens, index)
&& matchPdfStringStartsWith(tokens, index - 1, str);
}
/**
* Determines if the tokens at the given indices form a text
* object, i.e. if they are {@link #BEGIN_TEXT_OBJECT} and
* {@link #END_TEXT_OBJECT} respectively.
* @param tokens A list of PDF tokens.
* @param begin The index of the selected {@link #BEGIN_TEXT_OBJECT}
* candidate.
* @param end The index of the selected {@link #END_TEXT_OBJECT}
* candidate.
* @return True if the selected tokens are the expected operators,
* false otherwise.
* @see #isBeginTextObject(List, int)
* @see #isEndTextObject(List, int)
*/
public static boolean matchTextObject(List tokens,
int begin,
int end) {
return isBeginTextObject(tokens, begin)
&& isEndTextObject(tokens, end);
}
}