org.jpedal.examples.javafx.ExtractPagesAsJavaFX Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jsign-jpedal Show documentation
JPedal fork
The newest version!
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/java-pdf-library-support/
 *
 * (C) Copyright 1997-2013, IDRsolutions and Contributors.
 *
 * 	This file is part of JPedal
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * ExtractPagesAsJavaFX.java
 * ---------------
 */

/**
 *
 * This example opens a pdf file and extracts the JavaFX version of each page
 */
package org.jpedal.examples.javafx;

import java.awt.Rectangle;
import java.awt.geom.Point2D;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.jar.Manifest;

import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;

import org.jpedal.PdfDecoder;
import org.jpedal.external.Options;
import org.jpedal.fonts.FontMappings;
import org.jpedal.io.ObjectStore;
import org.jpedal.render.DynamicVectorRenderer;
import org.jpedal.render.output.GenericFontMapper;
import org.jpedal.render.output.OutputDisplay;
import org.jpedal.render.output.javafx.FXMLDisplay;
import org.jpedal.render.output.javafx.JavaFXDisplay;
import org.jpedal.utils.LogWriter;

public class ExtractPagesAsJavaFX {

	/** output where we put files */
	private String user_dir = System.getProperty("user.dir");

	/** flag to show if we print messages */
	public static boolean outputMessages = false;

	String output_dir = null;

	/** correct separator for OS */
	String separator = System.getProperty("file.separator");

	/** the decoder object which decodes the pdf and returns a data object */
	PdfDecoder decode_pdf = null;

	/** flag to show if using images at highest quality -switch on with command line flag Dhires */
	private boolean useHiresImage = false;

	/**
	 * sample file which can be setup - substitute your own. If a directory is given, all the files in the directory will be processed
	 */
	private String test_file = "/mnt/shared/sample_pdfs/general/World Factbook.pdf";

	/** used as part of test to limit pages to first 10 */
	public static boolean isTest = false;

	/** file password or null */
	private String password = "";

	// alt name for first page (ie index)
	private String firstPageName = null;

	/** Output a XML file contain the foonts in this extraction for user to edit **/
	private static boolean createTemplate = false;
	private static String saveTemplateFileName = "/Users/markee/Desktop/test.xml";

	private static boolean loadTemplate = false;
	private static String loadTemplateFileName;

	// Alternate between JavaFX and FXML
	private boolean outputAsFXML = false;

	int end, page;

	/** used by IDEs to exit on request */
	private boolean exitRequested;

	private int numPages = -1; // Used when JAR created.

	/**
	 * constructor to provide same functionality as main method
	 * 
	 */
	public ExtractPagesAsJavaFX() {

		init();
	}

	/**
	 * constructor to provide same functionality as main method
	 * 
	 */
	public ExtractPagesAsJavaFX(String[] args) {

		init();

		// read all values passed in by user and setup
		String file_name = setParams(args);

		// check file exists
		File pdf_file = new File(file_name);

		// if file exists, open and get number of pages
		if (!pdf_file.exists()) {
			System.out.println("File " + pdf_file + " not found");
			System.out.println("May need full path");

			return;
		}

		/**
		 * allow user to set a JVM flag to enable JavaFx or FXML
		 */
		if (System.getProperty("org.jpedal.pdf2javafx.outputAsFXML") != null
				&& System.getProperty("org.jpedal.pdf2javafx.outputAsFXML").toLowerCase().equals("true")) this.outputAsFXML = true;

		// System.out.println("testing file="+file_name);
		extraction(file_name, this.output_dir);

		String pdfName = pdf_file.getName();
		String name;
		// Online converter doesn't need FX or FXML appended to the name.
		if (System.getProperty("IsOnlineConverter") != null) {
			name = getStrippedText(pdfName.substring(0, pdfName.length() - 4));
		}
		else {
			name = convertPDFName(pdfName.substring(0, pdfName.length() - 4), !this.outputAsFXML);
		}

		// Compile all .java files
		compile(this.output_dir, name);

		if (this.firstPageName == null) { // We haven't specified a first name so use default
			this.firstPageName = "page";
			for (int i = 1; i < String.valueOf(this.numPages).length(); i++) {
				this.firstPageName += '0';
			}
			this.firstPageName += '1';
		}

		// Make an executable JAR file
		try {
			mkJar(this.output_dir, name, this.firstPageName);
		}
		catch (IOException e) {
			// tell user and log
			if (LogWriter.isOutput()) LogWriter.writeLog("Exception: " + e.getMessage());
		}

		// Clean up all those .class files that got created
		tidyUpClassFiles(this.output_dir, name);
	}

	private static void tidyUpClassFiles(String dir, String name) {
		File directory = new File(dir, name);
		for (File f : directory.listFiles()) {
			if (f.getAbsolutePath().endsWith(".class")) {
				f.delete();
			}
		}
	}

	private static void mkJar(String dir, String name, String firstPageName) throws IOException {
		Manifest manifest = new Manifest();
		manifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
		manifest.getMainAttributes().put(Attributes.Name.MAIN_CLASS, name + '/' + firstPageName);
		JarOutputStream target = new JarOutputStream(new FileOutputStream(dir + name + ".jar"), manifest);
		add(new File(dir, name), target, dir);
		target.close();
	}

	private static void add(File source, JarOutputStream target, String dir) throws IOException {
		BufferedInputStream in = null;
		try {
			if (source.isDirectory()) {
				for (File nestedFile : source.listFiles())
					add(nestedFile, target, dir);
				return;
			}

			JarEntry entry = new JarEntry(source.getPath().replace("\\", "/").replace((new File(dir).getPath() + "/").replace("\\", "/"), ""));
			entry.setTime(source.lastModified());
			target.putNextEntry(entry);
			in = new BufferedInputStream(new FileInputStream(source));

			byte[] buffer = new byte[1024];
			while (true) {
				int count = in.read(buffer);
				if (count == -1) break;
				target.write(buffer, 0, count);
			}
			target.closeEntry();
		}
		finally {
			if (in != null) in.close();
		}
	}

	private static void compile(String outputDir, String pdfName) {

		File dir = new File(outputDir, pdfName);

		ArrayList files = new ArrayList();

		for (File f : dir.listFiles()) {
			if (f.getAbsolutePath().endsWith(".java")) {
				files.add(f);
			}
		}

		JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
		if (compiler == null) throw new RuntimeException("Jar could not be created as Java version requires javac.");
		StandardJavaFileManager fileManager = compiler.getStandardFileManager(null, null, null);

		Iterable compilationUnits1 = fileManager.getJavaFileObjectsFromFiles(files);

		String[] compileOptions;
		if (System.getProperty("IsOnlineConverter") != null) {
			compileOptions = new String[] { "-encoding", "UTF-8", "-classpath", "../jfxrt.jar" };
		}
		else {
			compileOptions = new String[] { "-encoding", "UTF-8" };
		}
		Iterable compilationOptionss = Arrays.asList(compileOptions);

		compiler.getTask(null, fileManager, null, compilationOptionss, null, compilationUnits1).call();

		try {
			fileManager.close();
		}
		catch (IOException e) {
			// tell user and log
			if (LogWriter.isOutput()) LogWriter.writeLog("Exception: " + e.getMessage());
		}
	}

	private static void init() {

		loadTemplateFileName = System.getProperty("org.jpedal.loadXML");
		if (loadTemplateFileName != null && (new File(loadTemplateFileName).exists())) {
			loadTemplate = true;
		}
		else {
			loadTemplate = false;
		}

		saveTemplateFileName = System.getProperty("org.jpedal.saveXML");
		if (saveTemplateFileName != null) {
			createTemplate = true;
		}
		else {
			createTemplate = false;
		}
	}

	public void extraction(String file_name, String output_dir) {

		this.output_dir = output_dir;
		// check output dir has separator
		if (this.user_dir.endsWith(this.separator) == false) this.user_dir = this.user_dir + this.separator;

		// System.out.println("output_dir: " + output_dir);

		/**
		 * allow user to set a JVM flag to enable first name page (null if not set)
		 */
		this.firstPageName = System.getProperty("org.jpedal.pdf2javafx.firstPageName");

		/**
		 * if file name ends pdf, do the file otherwise do every pdf file in the directory. We already know file or directory exists so no need to
		 * check that, but we do need to check its a directory
		 */
		if (file_name.toLowerCase().endsWith(".pdf")) {

			decodeFile(file_name, output_dir);
		}
		else {

			/**
			 * get list of files and check directory
			 */

			String[] files = null;
			File inputFiles;

			/** make sure name ends with a deliminator for correct path later */
			if (!file_name.endsWith(this.separator)) file_name = file_name + this.separator;

			try {
				inputFiles = new File(file_name);

				if (!inputFiles.isDirectory()) {
					System.err.println(file_name + " is not a directory. Exiting program");

				}
				else files = inputFiles.list();
			}
			catch (Exception ee) {
				LogWriter.writeLog("Exception trying to access file " + ee.getMessage());

			}

			if (files != null) {
				/** now work through all pdf files */
				for (String file : files) {

					if (file.toLowerCase().endsWith(".pdf") && !file.startsWith(".")) {
						if (outputMessages) System.out.println(file_name + file);

						decodeFile(file_name + file, output_dir);

					}
				}
			}
		}

		/** tell user */
		if (outputMessages) System.out.println("JavaFX created");
	}

	/**
	 * routine to decode a file
	 */
	private void decodeFile(String file_name, String output_dir) {

		/**
		 * get just the name of the file without the path to use as a sub-directory
		 */

		String name = "demo"; // set a default just in case

		int pointer = file_name.lastIndexOf(this.separator);

		if (pointer == -1) pointer = file_name.lastIndexOf('/');

		if (pointer != -1) {
			name = file_name.substring(pointer + 1, file_name.length() - 4);
		}
		else
			if ((!ExtractPagesAsJavaFX.isTest) && (file_name.toLowerCase().endsWith(".pdf"))) {
				name = file_name.substring(0, file_name.length() - 4);
			}

		name = getStrippedText(name); // changes the name to a java safe name

		// PdfDecoder returns a PdfException if there is a problem
		try {
			this.decode_pdf = new PdfDecoder(true);

			/**
			 * font mappings
			 */
			if (!isTest) {

				// mappings for non-embedded fonts to use
				FontMappings.setFontReplacements();

			}

			/**
			 * open the file (and read metadata including pages in file)
			 */
			if (this.password != null) this.decode_pdf.openPdfFile(file_name, this.password);
			else this.decode_pdf.openPdfFile(file_name);

			this.numPages = this.decode_pdf.getPageCount();

		}
		catch (Exception e) {

			System.err.println("8.Exception " + e + " in pdf code in " + file_name);
		}

		/**
		 * extract data from pdf (if allowed).
		 */

		if (this.decode_pdf.isEncrypted() && !this.decode_pdf.isFileViewable()) {
			// exit with error if not test
			if (!isTest) throw new RuntimeException("Wrong password password used=>" + this.password + '<');
		}
		else
			if ((this.decode_pdf.isEncrypted() && (!this.decode_pdf.isPasswordSupplied())) && (!this.decode_pdf.isExtractionAllowed())) {
				throw new RuntimeException("Extraction not allowed");
			}
			else {

				if (!this.outputAsFXML) {
					// Added the name of the file to the output path so that a folder containing all the elements for the pdf is created.
					if (System.getProperty("IsOnlineConverter") != null) extractPageAsJavaFX(file_name, output_dir + name + this.separator, name);
					else extractPageAsJavaFX(file_name, output_dir + "FX" + name + this.separator, "FX" + name);
				}
				else {
					if (System.getProperty("IsOnlineConverter") != null) extractPageAsJavaFX(file_name, output_dir + name + this.separator, name);
					else extractPageAsJavaFX(file_name, output_dir + "FXML" + name + this.separator, "FXML" + name);
				}

			}

		/** close the pdf file */
		this.decode_pdf.closePdfFile();
	}

	public int getPageCount() {
		return this.end;
	}

	public int getPageReached() {
		return this.page;
	}

	public static String convertPDFName(String name, boolean isPDFtoFX) {
		if (isPDFtoFX) return "FX" + getStrippedText(name);
		else return "FXML" + getStrippedText(name);
	}

	private void extractPageAsJavaFX(String file_name, String output_dir, String name) {

		// create a directory if it doesn't exist
		if (output_dir != null) {
			File output_path = new File(output_dir);
			if (!output_path.exists()) output_path.mkdirs();
		}

		// page range
		int start = 1;
		this.end = this.decode_pdf.getPageCount();

		// limit to 1st ten pages in testing
		if (this.end > 10 && isTest) this.end = 10;

		/**
		 * extract data from pdf and then write out the pages as javaFX
		 */

		if (outputMessages) System.out.println("JavaFX file will be in  " + output_dir);

		try {

			GenericFontMapper.setXMLTemplate(createTemplate);

			if (loadTemplate) GenericFontMapper.loadCustomFontMappings(new FileInputStream(new File(loadTemplateFileName)));

			// add the icons to the directory
			File iconDir = new File(output_dir + "/icons");
			if (!iconDir.exists()) iconDir.mkdirs();

			/**
			 * copy all images
			 */
			String[] images = new String[] { "smstart.gif", "smback.gif", "smfback.gif", "smforward.gif", "smfforward.gif", "smend.gif", "logo.gif" };

			for (String image : images) {

				// data for each file in turn
				InputStream is = getClass().getResourceAsStream("/org/jpedal/examples/javafx/icons/" + image);
				BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(iconDir + this.separator + image));
				byte[] buffer = new byte[65536];// Not sure about this line
				int length;
				while ((length = is.read(buffer)) > 0) {
					os.write(buffer, 0, length);
				}
				os.close();
				is.close();
			}

			for (this.page = start; this.page < this.end + 1; this.page++) { // read pages

				/**
				 * create a name with zeros for if more than 9 pages appears in correct order
				 */
				String pageAsString = String.valueOf(this.page);

				if (this.firstPageName != null && this.page == start) {
					pageAsString = this.firstPageName;
				}
				else {
					String maxPageSize = String.valueOf(this.end);
					int padding = maxPageSize.length() - pageAsString.length();
					for (int ii = 0; ii < padding; ii++)
						pageAsString = '0' + pageAsString;
				}

				if (outputMessages) System.out.println("Page " + pageAsString);

				// String outputName =name +"page" + pageAsString; /*//use to debug multiple documents
				// System.out.println("========================================================"+outputName);

				int cropX = this.decode_pdf.getPdfPageData().getCropBoxX(this.page);
				int cropY = this.decode_pdf.getPdfPageData().getCropBoxY(this.page);
				int cropW = this.decode_pdf.getPdfPageData().getCropBoxWidth(this.page);
				int cropH = this.decode_pdf.getPdfPageData().getCropBoxHeight(this.page);

				// Create Rectangle object to match width and height of cropbox
				Rectangle cropBox = new Rectangle(0, 0, cropW, cropH);
				// Find middle of cropbox in Pdf Coordinates
				Point2D midPoint = new Point2D.Double((cropW / 2) + cropX, (cropH / 2) + cropY);

				DynamicVectorRenderer javaFXOutput;

				if (this.outputAsFXML) {
					javaFXOutput = new FXMLDisplay(this.page, midPoint, cropBox, false, 100, new ObjectStore(null));
				}
				else javaFXOutput = new JavaFXDisplay(this.page, midPoint, cropBox, false, 100, new ObjectStore(null));

				// have a scaling factor so we can alter the page size
				float scaling = 1.0f;

				/**
				 * if you want to fit it to a certain size, use this code work out max possible scaling for both width and height and use smaller to
				 * get max possible size but retain aspect ratio - will not always be exact match as preserves aspect ratio
				 * 
				 * float preferredWidth=1000,preferredHeight=1000;
				 * 
				 * float scalingX=preferredWidth/cropW; // scaling we need to scale w up to our value float scalingY=preferredHeight/cropH; // scaling
				 * we need to scale w up to our value
				 * 
				 * if(scalingX>scalingY) scaling=scalingY; else scaling=scalingX; /
				 **/

				javaFXOutput.setValue(OutputDisplay.PercentageScaling, (int) (scaling * 100)); // set page scaling (default is 100%)
				javaFXOutput.writeCustom(OutputDisplay.PAGEDATA, this.decode_pdf.getPdfPageData()); // pass in PageData object so we c
				javaFXOutput.setValue(OutputDisplay.MaxNumberOfDecimalPlaces, 0); // let use select max number of decimal places
				javaFXOutput.setOutputDir(output_dir, name, pageAsString); // root for output

				this.decode_pdf.addExternalHandler(javaFXOutput, Options.CustomOutput); // custom object to draw PDF

				// Set page range - Start and end of page decode
				javaFXOutput.setValue(OutputDisplay.StartOfDecode, start);
				javaFXOutput.setValue(OutputDisplay.EndOfDecode, this.end);

				/**
				 * This allows the user to have a nav bar on page
				 */
				javaFXOutput.setBooleanValue(OutputDisplay.AddNavBar, true);

				/**
				 * include irregular curved clips. (As used in SVG & HTML)
				 */
				javaFXOutput.setBooleanValue(OutputDisplay.IncludeClip, true);

				/**
				 * useful config options
				 */
				// JavaFXOutput.writeCustom(OutputDisplay.SET_ENCODING_USED, new String[]{"UTF-16","utf-16"}); //java/output string value

				/**
				 * get the current page as JavaFX
				 */
				this.decode_pdf.decodePage(this.page);

				// flush images in case we do more than 1 page so only contains
				// images from current page
				this.decode_pdf.flushObjectValues(true);
				// flush any text data read

				if (this.exitRequested) {
					this.end = this.page;
				}
			}
		}
		catch (Exception e) {

			this.decode_pdf.closePdfFile();
			throw new RuntimeException("Exception " + e.getMessage() + " on File=" + file_name);
		}

		if (createTemplate) {
			GenericFontMapper.createXMLTemplate(saveTemplateFileName);
		}
	}

	// ////////////////////////////////////////////////////////////////////////
	/**
	 * main routine which checks for any files passed and runs the demo
	 * 
	 */
	public static void main(String[] args) {

		if (outputMessages) System.out.println("Simple demo to extract JavaFX version of a page");

		new ExtractPagesAsJavaFX(args);
	}

	private String setParams(String[] args) {
		// set to default
		String file_name = this.test_file;

		// check user has passed us a filename and use default if none
		int len = args.length;
		if (len == 0) {
			showCommandLineValues();
		}
		else
			if (len == 1) {
				file_name = args[0];
			}
			else
				if (len < 6) {

					// input
					file_name = args[0];

					for (int j = 1; j < args.length; j++) {
						String value = args[j];

						// assume password if no / or \
						if (value.endsWith("/") || value.endsWith("\\")) this.output_dir = value;
						else this.password = value;

					}
				}
		return file_name;
	}

	private static void showCommandLineValues() {

		System.out.println("Example takes 2 or 3 parameters");
		System.out.println("Value 1 is the file name or directory of PDF files to process");
		System.out.println("Value 2 is the pass to write out JavaFX and directories and must end with / or \\ character)");
		System.out.println("Value 3 (optional) password for PDF file");

		System.exit(0);
	}

	/**
	 * @return Returns the output_dir.
	 */
	public String getOutputDir() {
		return this.output_dir;
	}

	/**
	 * used by IDEs to exit before end of file if requested
	 */
	public void stopConversion() {
		this.exitRequested = true;
	}

	/**
	 * @param input
	 * @return the stripped out, java coding friendly, version of input
	 */
	protected static String getStrippedText(String input) {

		String output = "";
		char illegalCharacters[] = { '<', '>', '\\', ':', ';', '*', '^', '@', '?', '=', '[', ']', '`' };
		char minVal = 48; // 0
		char maxVal = 122; // z
		for (int i = 0; i < input.length(); i++) {

			if (input.charAt(i) < minVal || input.charAt(i) > maxVal) {
				continue;
			}

			boolean foundIllegal = false;
			for (char illegalCharacter : illegalCharacters) {
				if (input.charAt(i) == illegalCharacter) {
					foundIllegal = true;
					break;
				}
			}
			if (foundIllegal) {
				continue;
			}

			output += input.charAt(i);
		}

		return output;
	}

}