All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.examples.viewer.commands.ExtractText Maven / Gradle / Ivy

There is a newer version: 20151002
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2015 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * ExtractText.java
 * ---------------
 */
package org.jpedal.examples.viewer.commands;

import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.Component;
import java.awt.Container;
import java.awt.Dimension;
import java.awt.Font;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.util.Enumeration;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.AbstractButton;
import javax.swing.BorderFactory;
import javax.swing.ButtonGroup;
import javax.swing.JButton;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JRadioButton;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextPane;
import javax.swing.JViewport;
import javax.swing.SpringLayout;
import javax.swing.SwingUtilities;
import javax.swing.border.EtchedBorder;
import javax.swing.text.BadLocationException;
import javax.swing.text.Document;
import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.StyleConstants;
import org.jpedal.PdfDecoderInt;
import org.jpedal.examples.viewer.Values;
import org.jpedal.examples.viewer.commands.generic.GUICopy;
import org.jpedal.examples.viewer.commands.generic.GUIExtractText;
import org.jpedal.exception.PdfException;
import org.jpedal.gui.GUIFactory;
import org.jpedal.objects.PdfPageData;
import org.jpedal.utils.Messages;

/**
 * Class to Handle the popup dialogs created when user right clicks 
 * highlighted text and chooses text extraction.
 */
public class ExtractText extends GUIExtractText {

    public static void execute(final Object[] args, final GUIFactory currentGUI, final PdfDecoderInt decode_pdf, final Values commonValues) {
        if (args == null) {
            extractSelectedText(currentGUI, decode_pdf, commonValues);
        } else {

        }
    }

    /**
     * routine to link GUI into text extraction functions
     */
    private static void extractSelectedText(final GUIFactory currentGUI, final PdfDecoderInt decode_pdf, final Values commonValues) {

        if (!decode_pdf.isExtractionAllowed()) {
            currentGUI.showMessageDialog("Not allowed");
            return;
        }

        final int[][] highlights = decode_pdf.getTextLines().getHighlightedAreasAs2DArray(commonValues.getCurrentPage());
        /**
         * ensure co-ords in right order
         */
        if (highlights == null) {
            //
            return;
        }

        /**
         * Window gui components
         */
        final JScrollPane examplePane = new JScrollPane();
        final JPanel display_value = new JPanel();
        final ButtonGroup group = new ButtonGroup();
        final JRadioButton text = new JRadioButton("Extract as Text");
        final JRadioButton xml = new JRadioButton("Extract  as  XML");
        final JRadioButton rectangleGrouping = new JRadioButton(Messages.getMessage("PdfViewerRect.label"));
        final JRadioButton tableGrouping = new JRadioButton(Messages.getMessage("PdfViewerTable.label"));
        final JRadioButton wordListExtraction = new JRadioButton(Messages.getMessage("PdfViewerWordList.label"));
        final SpringLayout layout = new SpringLayout();
        final JFrame extractionFrame = new JFrame(Messages.getMessage("PdfViewerCoords.message")
                + ' ' + commonValues.m_x1
                + " , " + commonValues.m_y1 + " , " + (commonValues.m_x2 - commonValues.m_x1) + " , " + (commonValues.m_y2 - commonValues.m_y1));
        //JLabel demoMessage = new JLabel(Messages.getMessage("PdfViewerDemo.message"));
        extractionFrame.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);

        final JLabel demoMessage = new JLabel("                         ");
        final ButtonGroup type = new ButtonGroup();
        final Object[] options = {Messages.getMessage("PdfViewerHelpMenu.text"),
            Messages.getMessage("PdfViewerCancel.text"),
            Messages.getMessage("PdfViewerextract.text")};
        final JButton help = new JButton((String) options[0]);
        final JButton cancel = new JButton((String) options[1]);
        final JButton extract = new JButton((String) options[2]);
        display_value.setLayout(layout);

        /**
         * Used to udpate the example scrollpane when an option is changed.
         */
        final Runnable r = new Runnable() {
            @Override
            public void run() {
                final Enumeration en = group.getElements();
                while (en.hasMoreElements()) { //First find which button has been changed
                    final AbstractButton button = (AbstractButton) en.nextElement();
                    if (button.isSelected()) {
                        final Component[] com = display_value.getComponents();
                        for (int i = 0; i != com.length; i++) {
                            if (com[i] instanceof JScrollPane) {
                                display_value.remove(com[i]);
                            }
                        }

                        try {
                            final JScrollPane scroll = updateExtractionExample(decode_pdf, commonValues, currentGUI, button, xml.isSelected());
                            if (scroll != null) {
                                layout.putConstraint(SpringLayout.EAST, scroll, -5, SpringLayout.EAST, display_value);
                                layout.putConstraint(SpringLayout.NORTH, scroll, 5, SpringLayout.SOUTH, tableGrouping);
                                display_value.add(scroll);
                            } else {

                                final JLabel noExample = new JLabel("No Example Available");

                                Font exampleFont = noExample.getFont();
                                exampleFont = exampleFont.deriveFont(exampleFont.getStyle(), 20f);//change as ME has no deriveFont(size only)

                                noExample.setFont(exampleFont);
                                noExample.setForeground(Color.RED);

                                layout.putConstraint(SpringLayout.EAST, noExample, -75, SpringLayout.EAST, display_value);
                                layout.putConstraint(SpringLayout.NORTH, noExample, 50, SpringLayout.SOUTH, tableGrouping);
                                display_value.add(noExample);
                            }
                        } catch (final PdfException ex) {
                            Logger.getLogger(ExtractText.class.getName()).log(Level.SEVERE, null, ex);
                        }

                        break;
                    }
                }

                //Update the display to ensure it is going to be displayed correctly
                display_value.updateUI();
            }
        };

        //Add demo message to the bottom of the display
        demoMessage.setFont(new Font("SansSerif", Font.BOLD, 10));
        demoMessage.setForeground(Color.red);
        layout.putConstraint(SpringLayout.WEST, demoMessage, 5, SpringLayout.WEST, display_value);
        layout.putConstraint(SpringLayout.SOUTH, demoMessage, -5, SpringLayout.SOUTH, display_value);
        display_value.add(demoMessage);

        /**
         * Add grouping buttons to the top of the display
         */
        //Rectangle grouping
        rectangleGrouping.setSelected(true);
        rectangleGrouping.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                xml.setText("Extract  as  XML");
                text.setText("Extract as Text");
                SwingUtilities.invokeLater(r);
            }
        });
        group.add(rectangleGrouping);
        rectangleGrouping.setToolTipText(Messages.getMessage("PdfViewerRect.message"));
        layout.putConstraint(SpringLayout.WEST, rectangleGrouping, 10, SpringLayout.WEST, display_value);
        layout.putConstraint(SpringLayout.NORTH, rectangleGrouping, 5, SpringLayout.NORTH, display_value);
        display_value.add(rectangleGrouping);
        //Table Grouping
        tableGrouping.setSelected(true);
        tableGrouping.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                xml.setText("Extract as XHTML");
                text.setText("Extract as CSV");
                SwingUtilities.invokeLater(r);
            }
        });
        group.add(tableGrouping);
        tableGrouping.setToolTipText(Messages.getMessage("PdfViewerTable.message"));
        layout.putConstraint(SpringLayout.WEST, tableGrouping, 50, SpringLayout.EAST, rectangleGrouping);
        layout.putConstraint(SpringLayout.NORTH, tableGrouping, 5, SpringLayout.NORTH, display_value);
        display_value.add(tableGrouping);
        //WordList Grouping
        wordListExtraction.setSelected(true);
        wordListExtraction.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                xml.setText("Extract  as  XML");
                text.setText("Extract as Text");
                SwingUtilities.invokeLater(r);
            }
        });
        group.add(wordListExtraction);
        wordListExtraction.setToolTipText(Messages.getMessage("PdfViewerWordList.message"));
        layout.putConstraint(SpringLayout.EAST, wordListExtraction, -5, SpringLayout.EAST, display_value);
        layout.putConstraint(SpringLayout.NORTH, wordListExtraction, 5, SpringLayout.NORTH, display_value);
        display_value.add(wordListExtraction);

        //Add example pane to the window
        examplePane.setPreferredSize(new Dimension(315, 150));
        examplePane.setMinimumSize(new Dimension(315, 150));
        layout.putConstraint(SpringLayout.EAST, examplePane, -5, SpringLayout.EAST, display_value);
        layout.putConstraint(SpringLayout.NORTH, examplePane, 5, SpringLayout.SOUTH, tableGrouping);
        display_value.add(examplePane);

        //Add xml and text radio buttons
        type.add(xml);
        type.add(text);
        xml.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                SwingUtilities.invokeLater(r);
            }
        });
        text.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                SwingUtilities.invokeLater(r);
            }
        });
        text.setSelected(true);
        layout.putConstraint(SpringLayout.WEST, xml, 5, SpringLayout.WEST, display_value);
        layout.putConstraint(SpringLayout.SOUTH, xml, -5, SpringLayout.NORTH, extract);
        display_value.add(xml);
        layout.putConstraint(SpringLayout.EAST, text, -5, SpringLayout.EAST, display_value);
        layout.putConstraint(SpringLayout.SOUTH, text, -5, SpringLayout.NORTH, extract);
        display_value.add(text);

        //Add the bottom buttons. Extract, Help and Cancel
        layout.putConstraint(SpringLayout.SOUTH, extract, -5, SpringLayout.NORTH, demoMessage);
        layout.putConstraint(SpringLayout.EAST, extract, -5, SpringLayout.EAST, display_value);
        display_value.add(extract);
        layout.putConstraint(SpringLayout.SOUTH, cancel, -5, SpringLayout.NORTH, demoMessage);
        layout.putConstraint(SpringLayout.EAST, cancel, -5, SpringLayout.WEST, extract);
        display_value.add(cancel);
        layout.putConstraint(SpringLayout.SOUTH, help, -5, SpringLayout.NORTH, demoMessage);
        layout.putConstraint(SpringLayout.EAST, help, -5, SpringLayout.WEST, cancel);
        display_value.add(help);

        help.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                final JTextArea info = new JTextArea(Messages.getMessage("PdfViewerGroupingInfo.message"));

                currentGUI.showMessageDialog(info);
            }
        });
        cancel.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                extractionFrame.setVisible(false);
                extractionFrame.dispose();
            }
        });
        extract.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(final ActionEvent e) {
                try {
                    String finalValue = ""; // Total data extracted so far
                    final boolean isXML = true;

                    final PdfPageData page_data = decode_pdf.getPdfPageData();

                    final int cropX = page_data.getCropBoxX(commonValues.getCurrentPage());
                    final int cropY = page_data.getCropBoxY(commonValues.getCurrentPage());
                    final int cropW = page_data.getCropBoxWidth(commonValues.getCurrentPage());
                    final int cropH = page_data.getCropBoxHeight(commonValues.getCurrentPage());

                    if (highlights != null) {
                        for (int t = 0; t != highlights.length; t++) {
                            String extractedText = "";

                            /**
                             * ensure co-ords in right order
                             */
                            highlights[t] = GUICopy.adjustHighlightForExtraction(highlights[t]);

                            int t_x1 = highlights[t][0];
                            int t_x2 = highlights[t][0] + highlights[t][2];
                            int t_y1 = highlights[t][1] + highlights[t][3];
                            int t_y2 = highlights[t][1];

                            if (t_y1 < t_y2) {
                                final int temp = t_y2;
                                t_y2 = t_y1;
                                t_y1 = temp;
                            }

                            if (t_x1 > t_x2) {
                                final int temp = t_x2;
                                t_x2 = t_x1;
                                t_x1 = temp;
                            }

                            if (t_x1 < cropX) {
                                t_x1 = cropX;
                            }
                            if (t_x1 > cropW - cropX) {
                                t_x1 = cropW - cropX;
                            }

                            if (t_x2 < cropX) {
                                t_x2 = cropX;
                            }
                            if (t_x2 > cropW - cropX) {
                                t_x2 = cropW - cropX;
                            }

                            if (t_y1 < cropY) {
                                t_y1 = cropY;
                            }
                            if (t_y1 > cropH - cropY) {
                                t_y1 = cropH - cropY;
                            }

                            if (t_y2 < cropY) {
                                t_y2 = cropY;
                            }
                            if (t_y2 > cropH - cropY) {
                                t_y2 = cropH - cropY;
                            }

                            if (rectangleGrouping.isSelected()) {
                                //text extraction
                                extractedText = extractTextRectangle(commonValues, decode_pdf, currentGUI, xml.isSelected(), t_x1, t_x2, t_y1, t_y2) + ((char) 0x0D) + ((char) 0x0A);
                            } else if (tableGrouping.isSelected()) {
                                //text table extraction
                                extractedText = extractTextTable(commonValues, decode_pdf, xml.isSelected(), t_x1, t_x2, t_y1, t_y2);
                            } else if (wordListExtraction.isSelected()) {
                                //text wordlist extraction
                                extractedText = extractTextList(decode_pdf, commonValues, currentGUI, xml.isSelected(), t_x1, t_x2, t_y1, t_y2);
                            }

                            finalValue += extractedText;
                        }
                    }

                    //Once all data is stored in finalValue, produce output window
                    if (finalValue != null) {

                        //Create scrollpane containg the final data
                        JScrollPane scroll = new JScrollPane();
                        try {
                            final JTextPane text_pane = new JTextPane();
                            scroll = createPane(text_pane, finalValue, isXML);
                        } catch (final BadLocationException e1) {
                            e1.printStackTrace();
                        }
                        scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
                        scroll.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED);
                        scroll.setPreferredSize(new Dimension(400, 400));

                        /**
                         * Create a resizeable pop-up for content
                         */
                        final JDialog displayFrame = new JDialog((JFrame) null, true);
                        displayFrame.setDefaultCloseOperation(JDialog.DISPOSE_ON_CLOSE);
                        if (commonValues.getModeOfOperation() != Values.RUNNING_APPLET) {
                            final Container frame = (Container)currentGUI.getFrame();
                            displayFrame.setLocation(frame.getLocationOnScreen().x + 10, frame.getLocationOnScreen().y + 10);
                        }
                        displayFrame.setSize(450, 450);
                        displayFrame.setTitle(Messages.getMessage("PdfViewerExtractedText.menu"));
                        displayFrame.getContentPane().setLayout(new BorderLayout());
                        displayFrame.getContentPane().add(scroll, BorderLayout.CENTER);

                        //Add buttons
                        final JPanel buttonBar = new JPanel();
                        buttonBar.setLayout(new BorderLayout());
                        displayFrame.getContentPane().add(buttonBar, BorderLayout.SOUTH);

                        /**
                         * yes option allows user to save content
                         */
                        final JButton yes = new JButton(Messages.getMessage("PdfViewerMenu.return"));
                        yes.setFont(new Font("SansSerif", Font.PLAIN, 12));
                        buttonBar.add(yes, BorderLayout.WEST);
                        yes.addActionListener(new ActionListener() {

                            @Override
                            public void actionPerformed(final ActionEvent e) {
                                displayFrame.dispose();

                            }
                        });

                        /**
                         * no option just removes display
                         */
                        final JButton no = new JButton(Messages.getMessage("PdfViewerFileMenuExit.text"));
                        no.setFont(new Font("SansSerif", Font.PLAIN, 12));
                        buttonBar.add(no, BorderLayout.EAST);
                        no.addActionListener(new ActionListener() {

                            @Override
                            public void actionPerformed(final ActionEvent e) {

                                displayFrame.dispose();
                            }
                        });

                        /**
                         * show the popup
                         */
                        displayFrame.setVisible(true);
                    }
                } catch (final PdfException e1) {
                    e1.printStackTrace();
                }
            }
        });

        //Add display panel to the extraction options window
        extractionFrame.getContentPane().add(display_value, BorderLayout.CENTER);
        extractionFrame.setSize(350, 300);

        //Initialise example
        SwingUtilities.invokeLater(r);

        //Set location over window
        extractionFrame.setLocationRelativeTo((Container)currentGUI.getFrame());
        extractionFrame.setResizable(false);

        //Display
        extractionFrame.setVisible(true);

    }

    private static JScrollPane createPane(final JTextPane text_pane, final String content, final boolean useXML) throws BadLocationException {

        text_pane.setEditable(true);
        text_pane.setFont(new Font("Lucida", Font.PLAIN, 14));

        text_pane.setToolTipText(Messages.getMessage("PdfViewerTooltip.text"));
        final Document doc = text_pane.getDocument();
        text_pane.setBorder(BorderFactory.createTitledBorder(new EtchedBorder(), Messages.getMessage("PdfViewerTitle.text")));
        text_pane.setForeground(Color.black);

        final SimpleAttributeSet token_attribute = new SimpleAttributeSet();
        final SimpleAttributeSet text_attribute = new SimpleAttributeSet();
        final SimpleAttributeSet plain_attribute = new SimpleAttributeSet();
        StyleConstants.setForeground(token_attribute, Color.blue);
        StyleConstants.setForeground(text_attribute, Color.black);
        StyleConstants.setForeground(plain_attribute, Color.black);
        int pointer = 0;

        /**
         * put content in and color XML
         */
        if ((useXML) && (content != null)) {
            //tokenise and write out data
            final StringTokenizer data_As_tokens = new StringTokenizer(content, "<>", true);

            while (data_As_tokens.hasMoreTokens()) {
                final String next_item = data_As_tokens.nextToken();

                if ((next_item.equals("<")) && ((data_As_tokens.hasMoreTokens()))) {

                    final String current_token = next_item + data_As_tokens.nextToken() + data_As_tokens.nextToken();

                    doc.insertString(pointer, current_token, token_attribute);
                    pointer += current_token.length();

                } else {
                    doc.insertString(pointer, next_item, text_attribute);
                    pointer += next_item.length();
                }
            }
        } else {
            doc.insertString(pointer, content, plain_attribute);
        }

        //wrap in scrollpane
        final JScrollPane text_scroll = new JScrollPane();
        text_scroll.getViewport().add(text_pane);
        text_scroll.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_NEVER);
        text_scroll.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED);
        return text_scroll;
    }

    private static JScrollPane updateExtractionExample(final PdfDecoderInt decode_pdf, final Values commonValues, final GUIFactory currentGUI, final AbstractButton button, final boolean xml) throws PdfException {

        JScrollPane scroll = new JScrollPane();
        String finalString = "";

        final PdfPageData page_data = decode_pdf.getPdfPageData();

        final int cropX = page_data.getCropBoxX(commonValues.getCurrentPage());
        final int cropY = page_data.getCropBoxY(commonValues.getCurrentPage());
        final int cropW = page_data.getCropBoxWidth(commonValues.getCurrentPage());
        final int cropH = page_data.getCropBoxHeight(commonValues.getCurrentPage());

        final int[][] highlights = decode_pdf.getTextLines().getHighlightedAreasAs2DArray(commonValues.getCurrentPage());

        if (highlights != null) {
            for (int t = 0; t != highlights.length; t++) {
                if (highlights[t] != null) {

                    highlights[t] = GUICopy.adjustHighlightForExtraction(highlights[t]);

                    int t_x1 = highlights[t][0];
                    int t_x2 = highlights[t][0] + highlights[t][2];
                    int t_y1 = highlights[t][1] + highlights[t][3];
                    int t_y2 = highlights[t][1];

                    if (t_y1 < t_y2) {
                        final int temp = t_y2;
                        t_y2 = t_y1;
                        t_y1 = temp;
                    }

                    if (t_x1 > t_x2) {
                        final int temp = t_x2;
                        t_x2 = t_x1;
                        t_x1 = temp;
                    }

                    if (t_x1 < cropX) {
                        t_x1 = cropX;
                    }
                    if (t_x1 > cropX + cropW) {
                        t_x1 = cropX + cropW;
                    }

                    if (t_x2 < cropX) {
                        t_x2 = cropX;
                    }
                    if (t_x2 > cropX + cropW) {
                        t_x2 = cropX + cropW;
                    }

                    if (t_y1 < cropY) {
                        t_y1 = cropY;
                    }
                    if (t_y1 > cropY + cropH) {
                        t_y1 = cropY + cropH;
                    }

                    if (t_y2 < cropY) {
                        t_y2 = cropY;
                    }
                    if (t_y2 > cropY + cropH) {
                        t_y2 = cropY + cropH;
                    }

                    if (button.getText().equals("Table")) {
                        finalString += extractTextTable(commonValues, decode_pdf, xml, t_x1, t_x2, t_y1, t_y2);
                    }

                    if (button.getText().equals("Rectangle")) {
                        finalString = finalString + extractTextRectangle(commonValues, decode_pdf, currentGUI, xml, t_x1, t_x2, t_y1, t_y2) + ' ';
                    }

                    if (button.getText().equals("WordList")) {
                        finalString += extractTextList(decode_pdf, commonValues, currentGUI, xml, t_x1, t_x2, t_y1, t_y2);
                    }
                }
            }
        }

        if (!finalString.isEmpty()) {

            try {
                scroll = createPane(new JTextPane(), finalString, xml);
            } catch (final BadLocationException e) {
                e.printStackTrace();
            }

            scroll.setPreferredSize(new Dimension(315, 150));
            scroll.setMinimumSize(new Dimension(315, 150));

            final Component[] coms = scroll.getComponents();
            for (int i = 0; i != coms.length; i++) {
                if (scroll.getComponent(i) instanceof JViewport) {
                    final JViewport view = (JViewport) scroll.getComponent(i);
                    final Component[] coms1 = view.getComponents();
                    for (int j = 0; j != coms1.length; j++) {
                        if (coms1[j] instanceof JTextPane) {
                            ((JTextPane) coms1[j]).setEditable(false);
                        }
                    }
                }
            }
            return scroll;
        }
        return null;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy