All Downloads are FREE. Search and download functionalities are using the official Maven repository.

technology.tabula.Page Maven / Gradle / Ivy

There is a newer version: 1.0.5
Show newest version
package technology.tabula;

import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDPage;

@SuppressWarnings("serial")
// TODO: this class should probably be called "PageArea" or something like that
public class Page extends Rectangle {

    private Integer rotation;
    private int pageNumber;
    private List texts;
    private List rulings, cleanRulings = null, verticalRulingLines = null, horizontalRulingLines = null;
    private float minCharWidth;
    private float minCharHeight;
    private RectangleSpatialIndex spatial_index;
    private PDPage pdPage;

    public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage) {
        super(top, left, width, height);
        this.rotation = rotation;
        this.pageNumber = page_number;
        this.pdPage = pdPage;
    }
    
    public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage,
            List characters, List rulings) {

        this(top, left, width, height, rotation, page_number, pdPage);
        this.texts = characters;
        this.rulings = rulings;
    }


    public Page(float top, float left, float width, float height, int rotation, int page_number, PDPage pdPage,
            List characters, List rulings,
            float minCharWidth, float minCharHeight, RectangleSpatialIndex index) {

        this(top, left, width, height, rotation, page_number, pdPage, characters, rulings);
        this.minCharHeight = minCharHeight;
        this.minCharWidth = minCharWidth;
        this.spatial_index = index;
    }

    public Page getArea(Rectangle area) {
        List t = getText(area);
        float min_char_width  = 7;
        float min_char_height = 7;

        if(t.size() > 0){
            min_char_width = Collections.min(t, new Comparator() {
                                    @Override
                                    public int compare(TextElement te1, TextElement te2) {
                                        return java.lang.Float.compare(te1.width, te2.width);
                                 }}).width;
            min_char_height = Collections.min(t, new Comparator() {
                                        @Override
                                        public int compare(TextElement te1, TextElement te2) {
                                            return java.lang.Float.compare(te1.height, te2.height);
                                  }}).height;
        }
        Page rv = new Page(
                area.getTop(),
                area.getLeft(),
                (float) area.getWidth(),
                (float) area.getHeight(),
                rotation,
                pageNumber,
                pdPage,
                t,
                Ruling.cropRulingsToArea(getRulings(), area),
                min_char_width,
                min_char_height,                
                spatial_index);
        
        rv.addRuling(new Ruling(
                new Point2D.Double(rv.getLeft(), 
                    rv.getTop()), 
                new Point2D.Double(rv.getRight(), 
                    rv.getTop())));
        rv.addRuling(new Ruling(
                new Point2D.Double(rv.getRight(), 
                    rv.getTop()), 
                new Point2D.Double(rv.getRight(), 
                    rv.getBottom())));
        rv.addRuling(new Ruling(
                new Point2D.Double(rv.getRight(), 
                    rv.getBottom()), 
                new Point2D.Double(rv.getLeft(), 
                    rv.getBottom())));
        rv.addRuling(new Ruling(
                new Point2D.Double(rv.getLeft(), 
                    rv.getBottom()), 
                new Point2D.Double(rv.getLeft(), 
                    rv.getTop())));
 
        return rv;
    }
    
    public Page getArea(float top, float left, float bottom, float right) {
        Rectangle area = new Rectangle(top, left, right - left, bottom - top);
        return this.getArea(area);
    }
    
    public List getText() {
        return texts;
    }
    
    public List getText(Rectangle area) {
        return this.spatial_index.contains(area);
    }
    
    /** @deprecated use {@linkplain #getText(Rectangle)} instead  */
    @Deprecated public List getText(float top, float left, float bottom, float right) {
        return this.getText(new Rectangle(top, left, right - left, bottom - top));
    }

    public Integer getRotation() {
        return rotation;
    }

    public int getPageNumber() {
        return pageNumber;
    }

    /** @deprecated use {@linkplain #getText()} instead  */
    @Deprecated public List getTexts() {
        return texts;
    }
    
    /**
     * Returns the minimum bounding box that contains all the TextElements on this Page
     */
    public Rectangle getTextBounds() {
        List texts = this.getText();
        if (!texts.isEmpty()) {
            return Utils.bounds(texts);
        }
        else {
            return new Rectangle();
        }
        
    }

    public List getRulings() {
        if (this.cleanRulings != null) {
            return this.cleanRulings;
        }
        
        if (this.rulings == null || this.rulings.isEmpty()) {
            this.verticalRulingLines = new ArrayList<>();
            this.horizontalRulingLines = new ArrayList<>();
            return new ArrayList<>();
        }
        
        Utils.snapPoints(this.rulings, this.minCharWidth, this.minCharHeight);
        
        List vrs = new ArrayList<>();
        for (Ruling vr: this.rulings) {
            if (vr.vertical()) {
                vrs.add(vr);
            }
        }
        this.verticalRulingLines = Ruling.collapseOrientedRulings(vrs);
        
        List hrs = new ArrayList<>(); 
        for (Ruling hr: this.rulings) {
            if (hr.horizontal()) {
                hrs.add(hr);
            }
        }
        this.horizontalRulingLines = Ruling.collapseOrientedRulings(hrs);
        
        this.cleanRulings = new ArrayList<>(this.verticalRulingLines);
        this.cleanRulings.addAll(this.horizontalRulingLines);
        
        return this.cleanRulings;
        
    }
    
    public List getVerticalRulings() {
        if (this.verticalRulingLines != null) {
            return this.verticalRulingLines;
        }
        this.getRulings();
        return this.verticalRulingLines;
    }
    
    public List getHorizontalRulings() {
        if (this.horizontalRulingLines != null) {
            return this.horizontalRulingLines;
        }
        this.getRulings();
        return this.horizontalRulingLines;
    }
    
    public void addRuling(Ruling r) {
        if (r.oblique()) {
            throw new UnsupportedOperationException("Can't add an oblique ruling");
        }
        this.rulings.add(r);
        // clear caches
        this.verticalRulingLines = null;
        this.horizontalRulingLines = null;
        this.cleanRulings = null;
    }
    
    public List getUnprocessedRulings() {
        return this.rulings;
    }

    /** @deprecated with no replacement  */
    @Deprecated public float getMinCharWidth() {
        return minCharWidth;
    }

    /** @deprecated with no replacement  */
    @Deprecated public float getMinCharHeight() {
        return minCharHeight;
    }

    public PDPage getPDPage() {
      return pdPage;
    }

    /** @deprecated with no replacement  */
    @Deprecated public RectangleSpatialIndex getSpatialIndex() {
        return this.spatial_index;
    }
    
    /** @deprecated with no replacement  */
    @Deprecated public boolean hasText() {
        return this.texts.size() > 0;
    }
    
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy